[Python-checkins] python/dist/src/Lib _strptime.py,1.10,1.11
tim_one@users.sourceforge.net
tim_one@users.sourceforge.net
Sat, 18 Jan 2003 20:40:46 -0800
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv15164/python/Lib
Modified Files:
_strptime.py
Log Message:
SF patch 670194: Performance enhancement for _strptime.py.
>From Brett Cannon. Mostly speedups via caching format string ->
compiled regexp.
Index: _strptime.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/_strptime.py,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** _strptime.py 18 Jan 2003 03:53:49 -0000 1.10
--- _strptime.py 19 Jan 2003 04:40:44 -0000 1.11
***************
*** 25,29 ****
from re import compile as re_compile
from re import IGNORECASE
- from string import whitespace as whitespace_string
__author__ = "Brett Cannon"
--- 25,28 ----
***************
*** 34,37 ****
--- 33,47 ----
RegexpType = type(re_compile(''))
+ def _getlang():
+ # Figure out what the current language is set to.
+ current_lang = locale.getlocale(locale.LC_TIME)[0]
+ if current_lang:
+ return current_lang
+ else:
+ current_lang = locale.getdefaultlocale()[0]
+ if current_lang:
+ return current_lang
+ else:
+ return ''
class LocaleTime(object):
***************
*** 286,302 ****
def __calc_lang(self):
! # Set self.__lang by using locale.getlocale() or
! # locale.getdefaultlocale(). If both turn up empty, set the attribute
! # to ''. This is to stop calls to this method and to make sure
! # strptime() can produce an re object correctly.
! current_lang = locale.getlocale(locale.LC_TIME)[0]
! if current_lang:
! self.__lang = current_lang
! else:
! current_lang = locale.getdefaultlocale()[0]
! if current_lang:
! self.__lang = current_lang
! else:
! self.__lang = ''
--- 296,302 ----
def __calc_lang(self):
! # Set self.__lang by using __getlang().
! self.__lang = _getlang()
!
***************
*** 383,388 ****
"""Return re pattern for the format string."""
processed_format = ''
! for whitespace in whitespace_string:
! format = format.replace(whitespace, r'\s*')
while format.find('%') != -1:
directive_index = format.index('%')+1
--- 383,388 ----
"""Return re pattern for the format string."""
processed_format = ''
! whitespace_replacement = re_compile('\s+')
! format = whitespace_replacement.sub('\s*', format)
while format.find('%') != -1:
directive_index = format.index('%')+1
***************
*** 395,407 ****
def compile(self, format):
"""Return a compiled re object for the format string."""
- format = "(?#%s)%s" % (self.locale_time.lang,format)
return re_compile(self.pattern(format), IGNORECASE)
def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
"""Return a time struct based on the input data and the format string."""
! locale_time = LocaleTime()
! compiled_re = TimeRE(locale_time).compile(format)
! found = compiled_re.match(data_string)
if not found:
raise ValueError("time data did not match format")
--- 395,423 ----
def compile(self, format):
"""Return a compiled re object for the format string."""
return re_compile(self.pattern(format), IGNORECASE)
+ # Cached TimeRE; probably only need one instance ever so cache it for performance
+ _locale_cache = TimeRE()
+ # Cached regex objects; same reason as for TimeRE cache
+ _regex_cache = dict()
def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
"""Return a time struct based on the input data and the format string."""
! global _locale_cache
! global _regex_cache
! locale_time = _locale_cache.locale_time
! # If the language changes, caches are invalidated, so clear them
! if locale_time.lang != _getlang():
! _locale_cache = TimeRE()
! _regex_cache.clear()
! format_regex = _regex_cache.get(format)
! if not format_regex:
! # Limit regex cache size to prevent major bloating of the module;
! # The value 5 is arbitrary
! if len(_regex_cache) > 5:
! _regex_cache.clear()
! format_regex = _locale_cache.compile(format)
! _regex_cache[format] = format_regex
! found = format_regex.match(data_string)
if not found:
raise ValueError("time data did not match format")