[Python-checkins] python/dist/src/Lib/email Header.py,1.17.6.1,1.17.6.2

Tim Peters tim.one@comcast.net
Sun, 02 Mar 2003 14:00:58 -0500


> Modified Files:
>       Tag: folding-reimpl-branch
> 	Header.py
> Log Message:
> Experimental binary search for a better split point.

...

> + def _binsplit(splittable, charset, maxlinelen):
> +     i = lastm = 0
> +     j = len(splittable) - 1
> +     while True:
> +         if j < i:
> +             break
> +         m = (i + j) / 2
> +         chunk = charset.from_splittable(splittable[:m], True)
> +         chunklen = charset.encoded_header_len(chunk)
> +         if chunklen < maxlinelen:
> +             lastm = m
> +             i = m + 1
> +         elif chunklen > maxlinelen:
> +             j = m - 1
> +         else:
> +             break
> +     first = charset.from_splittable(splittable[:lastm], False)
> +     last = charset.from_splittable(splittable[lastm:], False)
> +     return first, last

I'm not entirely sure what this is doing, but if I grok it I trust this
version more <wink>:

def _binsplit(splittable, charset, maxlinelen):
    i, j = 0, len(splittable)
    while i < j:
        # Invariants:
        # 1. splittable[:k] fits for all k <= i (note that we *assume*,
        #    at the start, that splittable[:0] fits).
        # 2. splittable[:k] does not fit for any k > j (at the start,
        #    this means we shouldn't look at any k > len(splittable)).
        # 3. We don't know about splittable[:k] for k in i+1..j.
        # 4. We want to set i to the largest k that fits, with i <= k <= j.

        m = (i+j+1) >> 1  # ceiling((i+j)/2); i < m <= j
        chunk = charset.from_splittable(splittable[:m], True)
        chunklen = charset.encoded_header_len(chunk)
        if chunklen <= maxlinelen:
            # m is acceptable, so is a new lower bound.
            i = m
        else:
            # m is not acceptable, so final i must be < j.
            j = m-1
    # i == j.  Invariant #1 implies that splittable[:i] fits, and
    # invariant #2 implies that splittable[:i+1] does not fit, so i
    # is what we're looking for.
    first = charset.from_splittable(splittable[:i], False)
    last  = charset.from_splittable(splittable[i:], False)
    return first, last

You could break out early when chunklen == maxlinelen, but it's not
necessary and *usually* slows binary searches.