Underscore/Camelcase insensitivity

Mon Feb 24 20:09:48 EST 2003

m.faassen at vet.uu.nl (Martijn Faassen) writes:
> the camelCase convention. This week I was pondering making a mixin class which
> helps us transition underscore to camelcase automatically. So, in a way
> I was even serious here.
> 
> 

I've actually discovered that converting from and to camelCase is really much
more of a pain than you'd think (I might just be doing it stupidly). Anyway,
here's the horribly convoluted code I came up with some time ago, in case it
is of any use for your enterprise.

alex

import string
_UP = string.uppercase               # XXX is that what we should use?
_lo = string.lowercase
_UP_0_9 = _UP + string.digits
def camelJoin(strs, capitalizeFirst=True):
    """Join `strs` to a CamelCasedWord.
    """
    if len(strs) == 1:
        parts = strs
    else:
        parts = []
        for this, next in xwindow(strs):
            if this[-1] in _UP:
                if (next[-1] in _UP_0_9):
                    parts.append(this + "_")
                else:
                    parts.append(this)
            else:
                parts.append(this.capitalize())
        if next[-1] in _UP_0_9:
            parts.append(next)
        else:
            parts.append(next.capitalize())
    if not capitalizeFirst:
        parts[0] = parts[0].lower()
    return "".join(parts)

def camelSplit(name, strict=True):
    """Split a CamelCase name into its components. Retains capitalization
       information where it isn't used to separate words. Retains leading and
       trailing 'magic' underbars.
    """
    global _UP, _lo
    leading_bars , mid, trailing_bars = \
            re.match(r'(_+)?(.*?)(_+)?$', name).groups()
    if not mid: # very special case: name ~= /_*/
        return [[name], []][not name]
    elif mid.isupper():
        # HACK the '_*' bit is to have consecutive '_'s fail
        split = re.split('(_)_*', mid)
    elif "_" not in mid and name.islower():
        split = [mid]
    else:
        # need this for trailing and leading bars, that should be preserved
        r = r'''    ^[lo][lo\d]*(?=[UP]|$)
                    |
                     [UP][UP\d]*(?=_[UP\d])
                    |
                     [UP][lo][lo\d]*
                    |
                     [UP][UP\d]*(?=[UP][lo]|$)
                    |
                    _ # need to keep this to reconstruct case for 1 char parts
                 '''
        splitRE = re.compile(replaceStrs(r, ("UP", _UP), ("lo", _lo)), re.X)
        if strict:    
            split = splitRE.findall(mid)
        else:
            split = reduce(operator.add, map(splitRE.findall,
                                             re.split('(_)_*', mid)))
        def normCase(t):
            previous, x = t
            if (re.search(r'[A-Z]{2,}',x)): return x
            else: return x.lower()
        # NB: split[0] is *not* normalized
        split[1:] = map(normCase, xwindow(split))
    split[0]  =  (leading_bars or "") + split[0]
    split[-1] += (trailing_bars or "")
    if reduce(int.__add__, map(len,split),0) != len(name):
        raise ValueError("Illegal CamelCase name supplied: %s" % name)
    # kick out all '_'s, except "magic" trailing and leading ones
    return filter("_".__ne__, split)

def xwindow(iter, n=2, s=1):
    r"""Move an `n`-item (default 2) windows `s` steps (default 1) at a time
    over `iter`.
    """

    assert n >= s
    last = []
    for elt in iter:
        last.append(elt)
        if len(last) == n: yield tuple(last); last=last[s:]