Uh oh!
There was an error while loading. Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork 34k
gh-96346: Use double caching for re._compile()#96347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Uh oh!
There was an error while loading. Please reload this page.
Changes from all commits
91515495fe3232d619f6d077e6ddbd618bfFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading. Please reload this page.
Jump to
Uh oh!
There was an error while loading. Please reload this page.
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -229,6 +229,7 @@ def compile(pattern, flags=0): | ||
| def purge(): | ||
| "Clear the regular expression caches" | ||
| _cache.clear() | ||
| _cache2.clear() | ||
| _compile_repl.cache_clear() | ||
| def template(pattern, flags=0): | ||
| @@ -266,40 +267,64 @@ def escape(pattern): | ||
| # -------------------------------------------------------------------- | ||
| # internals | ||
| _cache ={} # ordered! | ||
| # Use the fact that dict keeps the insertion order. | ||
| # _cache2 uses the simple FIFO policy which has better latency. | ||
| # _cache uses the LRU policy which has better hit rate. | ||
| _cache ={} # LRU | ||
| _cache2 ={} # FIFO | ||
| _MAXCACHE = 512 | ||
| _MAXCACHE2 = 256 | ||
| assert _MAXCACHE2 < _MAXCACHE | ||
| def _compile(pattern, flags): | ||
| # internal: compile pattern | ||
| if isinstance(flags, RegexFlag): | ||
| flags = flags.value | ||
| try: | ||
| return _cache[type(pattern), pattern, flags] | ||
| return _cache2[type(pattern), pattern, flags] | ||
| except KeyError: | ||
| pass | ||
| if isinstance(pattern, Pattern): | ||
| if flags: | ||
| raise ValueError( | ||
| "cannot process flags argument with a compiled pattern") | ||
| return pattern | ||
| if not _compiler.isstring(pattern): | ||
| raise TypeError("first argument must be string or compiled pattern") | ||
| if flags & T: | ||
| import warnings | ||
| warnings.warn("The re.TEMPLATE/re.T flag is deprecated " | ||
| "as it is an undocumented flag " | ||
| "without an obvious purpose. " | ||
| "Don't use it.", | ||
| DeprecationWarning) | ||
| p = _compiler.compile(pattern, flags) | ||
| if not (flags & DEBUG): | ||
| key = (type(pattern), pattern, flags) | ||
| # Item in _cache should be moved to the end if found. | ||
| p = _cache.pop(key, None) | ||
Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does it remove it from the MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we need to move it to the end. | ||
| if p is None: | ||
| if isinstance(pattern, Pattern): | ||
| if flags: | ||
| raise ValueError( | ||
| "cannot process flags argument with a compiled pattern") | ||
| return pattern | ||
| if not _compiler.isstring(pattern): | ||
| raise TypeError("first argument must be string or compiled pattern") | ||
| if flags & T: | ||
| import warnings | ||
| warnings.warn("The re.TEMPLATE/re.T flag is deprecated " | ||
| "as it is an undocumented flag " | ||
| "without an obvious purpose. " | ||
| "Don't use it.", | ||
| DeprecationWarning) | ||
| p = _compiler.compile(pattern, flags) | ||
| if flags & DEBUG: | ||
| return p | ||
| if len(_cache) >= _MAXCACHE: | ||
| # Drop the oldest item | ||
| # Drop the least recently used item. | ||
| # next(iter(_cache)) is known to have linear amortized time, | ||
| # but it is used here to avoid a dependency from using OrderedDict. | ||
| # For the small _MAXCACHE value it doesn't make much of a difference. | ||
| try: | ||
| del _cache[next(iter(_cache))] | ||
serhiy-storchaka marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| except (StopIteration, RuntimeError, KeyError): | ||
| pass | ||
| _cache[type(pattern), pattern, flags] = p | ||
| # Append to the end. | ||
| _cache[key] = p | ||
| if len(_cache2) >= _MAXCACHE2: | ||
| # Drop the oldest item. | ||
| try: | ||
| del _cache2[next(iter(_cache2))] | ||
| except (StopIteration, RuntimeError, KeyError): | ||
| pass | ||
| _cache2[key] = p | ||
| return p | ||
| @functools.lru_cache(_MAXCACHE) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Use double caching for compiled RE patterns. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't this be defined before the
try/exceptabove?I also wonder if an
if key in _cache2: return _cache2[key]would be more efficient than thetry/except.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All this would add a small but measurable overhead in the common case. I tested this when I wrote the current implementation.