diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 0000000..b596fc6 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,52 @@ +name: integration + +on: [push, pull_request] + +jobs: + + checks: + runs-on: ubuntu-latest + strategy: + max-parallel: 8 + matrix: + check: [bluecheck, doc8, docs, flake8, isortcheck, mypy, pylint, rstcheck] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + pip install --upgrade pip + pip install tox + - name: Run checks with tox + run: | + tox -e ${{ matrix.check }} + + tests: + needs: checks + runs-on: ${{ matrix.os }} + strategy: + max-parallel: 8 + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [3.8, 3.9, '3.10', 3.11] + + steps: + - name: Set up Python ${{ matrix.python-version }} x64 + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - uses: actions/checkout@v3 + + - name: Install tox + run: | + pip install --upgrade pip + pip install tox + + - name: Test with tox + run: tox -e py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..33b3a8f --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,30 @@ +name: release + +on: + push: + tags: + - v* + +jobs: + + upload: + runs-on: ubuntu-latest + permissions: + id-token: write + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install build + run: pip install build + + - name: Create build + run: python -m build + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 69c7ee7..67157b8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,14 +3,19 @@ # virutalenv directories /env*/ +/.venv*/ -# coverage files -.coverage - -# setup sdist, test and upload directories +# test files/directories +/.cache/ +.coverage* +.pytest_cache/ /.tox/ + +# setup and upload directories /build/ /dist/ /diskcache.egg-info/ +/docs/_build/ +# macOS metadata .DS_Store diff --git a/.pylintrc b/.pylintrc index 7e716ec..dc1490a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,197 +1,135 @@ -[MASTER] +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) extension-pkg-whitelist= -# Add files or directories to the blacklist. They should be base names, not -# paths. +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. ignore=CVS -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= -# Use multiple processes to speed up Pylint. +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. jobs=1 -# List of plugins (as comma separated values of python modules names) to load, +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, # usually to register additional checkers. load-plugins= # Pickle collected data for later comparisons. persistent=yes -# Specify a configuration file. -#rcfile= +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.11 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= # When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages +# user-friendly hints instead of false-positive error messages. suggestion-mode=yes # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use"--disable=all --enable=classes -# --disable=W" -disable=print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, - raw-checker-failed, - bad-inline-option, - locally-disabled, - locally-enabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - no-else-return, - inconsistent-return-statements, - not-callable - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio).You can also give a reporter class, eg -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=6 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=optparse.Values,sys.exit +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= [BASIC] -# Naming style matching correct argument names +# Naming style matching correct argument names. argument-naming-style=snake_case # Regular expression matching correct argument names. Overrides argument- -# naming-style +# naming-style. If left empty, argument names will be checked with the set +# naming style. #argument-rgx= -# Naming style matching correct attribute names +# Naming style matching correct attribute names. attr-naming-style=snake_case # Regular expression matching correct attribute names. Overrides attr-naming- -# style +# style. If left empty, attribute names will be checked with the set naming +# style. #attr-rgx= -# Bad variable names which should always be refused, separated by a comma +# Bad variable names which should always be refused, separated by a comma. bad-names=foo, bar, baz, @@ -199,38 +137,54 @@ bad-names=foo, tutu, tata -# Naming style matching correct class attribute names +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. class-attribute-naming-style=any # Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. #class-attribute-rgx= -# Naming style matching correct class names +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. class-naming-style=PascalCase -# Regular expression matching correct class names. Overrides class-naming-style +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. #class-rgx= -# Naming style matching correct constant names +# Naming style matching correct constant names. const-naming-style=UPPER_CASE # Regular expression matching correct constant names. Overrides const-naming- -# style +# style. If left empty, constant names will be checked with the set naming +# style. #const-rgx= # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. docstring-min-length=-1 -# Naming style matching correct function names +# Naming style matching correct function names. function-naming-style=snake_case # Regular expression matching correct function names. Overrides function- -# naming-style +# naming-style. If left empty, function names will be checked with the set +# naming style. #function-rgx= -# Good variable names which should always be accepted, separated by a comma +# Good variable names which should always be accepted, separated by a comma. good-names=i, j, k, @@ -238,28 +192,33 @@ good-names=i, Run, _ -# Include a hint for the correct naming format with invalid-name +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. include-naming-hint=no -# Naming style matching correct inline iteration names +# Naming style matching correct inline iteration names. inlinevar-naming-style=any # Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. #inlinevar-rgx= -# Naming style matching correct method names +# Naming style matching correct method names. method-naming-style=snake_case # Regular expression matching correct method names. Overrides method-naming- -# style +# style. If left empty, method names will be checked with the set naming style. #method-rgx= -# Naming style matching correct module names +# Naming style matching correct module names. module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- -# style +# style. If left empty, module names will be checked with the set naming style. #module-rgx= # Colon-delimited sets of names that determine each other's naming style when @@ -272,16 +231,96 @@ no-docstring-rgx=^_ # List of decorators that produce properties, such as abc.abstractproperty. Add # to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. property-classes=abc.abstractproperty -# Naming style matching correct variable names +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. variable-naming-style=snake_case # Regular expression matching correct variable names. Overrides variable- -# naming-style +# naming-style. If left empty, variable names will be checked with the set +# naming style. #variable-rgx= +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=8 + +# Maximum number of attributes for a class (see R0902). +max-attributes=8 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=20 + +# Maximum number of locals for function / method body. +max-locals=30 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=30 + +# Maximum number of return / yield for function / method body. +max-returns=8 + +# Maximum number of statements in function / method body. +max-statements=60 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + [FORMAT] # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. @@ -290,7 +329,7 @@ expected-line-ending-format= # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$ -# Number of spaces of indent required inside a hanging or continued line. +# Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 @@ -300,15 +339,8 @@ indent-string=' ' # Maximum number of characters on a single line. max-line-length=100 -# Maximum number of lines in a module -max-module-lines=2000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator +# Maximum number of lines in a module. +max-module-lines=2500 # Allow the body of a class to be on the same line as the declaration if body # contains single statement. @@ -319,13 +351,104 @@ single-line-class-stmt=no single-line-if-stmt=no +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + [LOGGING] +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + # Logging modules to check that the string format arguments are in logging -# function parameter format +# function parameter format. logging-modules=logging +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + consider-using-f-string, + no-member, + no-else-return, + no-else-raise, + inconsistent-return-statements + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. @@ -333,42 +456,100 @@ notes=FIXME, XXX, TODO +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + [SIMILARITIES] -# Ignore comments when computing similarities. +# Comments are removed from the similarity computation ignore-comments=yes -# Ignore docstrings when computing similarities. +# Docstrings are removed from the similarity computation ignore-docstrings=yes -# Ignore imports when computing similarities. -ignore-imports=no +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes # Minimum lines number of a similarity. -min-similarity-lines=4 +min-similarity-lines=20 [SPELLING] -# Limits count of emitted suggestions for spelling mistakes +# Limits count of emitted suggestions for spelling mistakes. max-spelling-suggestions=4 -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package. +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work.. spelling-dict= +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + # List of comma separated words that should not be checked. spelling-ignore-words= -# A path to a file that contains private dictionary; one word per line. +# A path to a file that contains the private dictionary; one word per line. spelling-private-dict-file= -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. spelling-store-unknown-words=no +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + [TYPECHECK] # List of decorators that produce context managers, such as @@ -379,15 +560,11 @@ contextmanager-decorators=contextlib.contextmanager # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. -generated-members=eviction_policy, - statistics, - count, - size, - cull_limit +generated-members= -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes # This flag controls whether pylint should warn about no-member and similar # checks whenever an opaque object is returned when inferring. The inference @@ -397,16 +574,16 @@ ignore-mixin-members=yes # the rest of the inferred objects. ignore-on-opaque-inference=yes +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of # qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace # Show a hint with possible names when a member name was not found. The aspect # of finding the hint is based on edit distance. @@ -420,27 +597,35 @@ missing-member-hint-distance=1 # showing a hint for a missing member. missing-member-max-choices=1 +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + [VARIABLES] # List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. +# you should avoid defining new builtins when possible. additional-builtins= # Tells whether unused global variables should be treated as a violation. allow-global-unused-variables=yes +# List of names allowed to shadow builtins +allowed-redefined-builtins= + # List of strings which can identify a callback function by name. A callback # name must start or end with one of those strings. callbacks=cb_, _cb -# A regular expression matching the name of dummy variables (i.e. expectedly -# not used). +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ -# Argument names that match this expression will be ignored. Default to name -# with leading underscore +# Argument names that match this expression will be ignored. ignored-argument-names=_.*|^ignored_|^unused_ # Tells whether we should check for unused import in __init__ files. @@ -448,99 +633,4 @@ init-import=no # List of qualified module names which can have objects that can redefine # builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[DESIGN] - -# Maximum number of arguments for function / method -max-args=8 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in a if statement -max-bool-expr=5 - -# Maximum number of branch for function / method body -max-branches=20 - -# Maximum number of locals for function / method body -max-locals=30 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=25 - -# Maximum number of return / yield for function / method body -max-returns=8 - -# Maximum number of statements in function / method body -max-statements=60 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled) -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled) -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled) -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=Exception +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c72311a..0000000 --- a/.travis.yml +++ /dev/null @@ -1,11 +0,0 @@ -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "pypy" -install: - - pip install -r requirements.txt -script: - - nosetests -v diff --git a/LICENSE b/LICENSE index ff2e17b..bb4cfb7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,12 @@ -Copyright 2016-2018 Grant Jenks +Copyright 2016-2022 Grant Jenks -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 645a28c..0c73842 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include README.rst LICENSE requirements.txt +include README.rst LICENSE diff --git a/README.rst b/README.rst index f6cf18c..04abdc0 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ DiskCache: Disk Backed Cache `DiskCache`_ is an Apache2 licensed disk and file backed cache library, written in pure-Python, and compatible with Django. -The cloud-based computing of 2018 puts a premium on memory. Gigabytes of empty +The cloud-based computing of 2023 puts a premium on memory. Gigabytes of empty space is left on disks as processes vie for memory. Among these processes is Memcached (and sometimes Redis) which is used as a cache. Wouldn't it be nice to leverage empty disk space for caching? @@ -46,9 +46,24 @@ testing has 100% coverage with unit tests and hours of stress. Testimonials ------------ +`Daren Hasenkamp`_, Founder -- + + "It's a useful, simple API, just like I love about Redis. It has reduced + the amount of queries hitting my Elasticsearch cluster by over 25% for a + website that gets over a million users/day (100+ hits/second)." + +`Mathias Petermann`_, Senior Linux System Engineer -- + + "I implemented it into a wrapper for our Ansible lookup modules and we were + able to speed up some Ansible runs by almost 3 times. DiskCache is saving + us a ton of time." + Does your company or website use `DiskCache`_? Send us a `message `_ and let us know. +.. _`Daren Hasenkamp`: https://www.linkedin.com/in/daren-hasenkamp-93006438/ +.. _`Mathias Petermann`: https://www.linkedin.com/in/mathias-petermann-a8aa273b/ + Features -------- @@ -62,43 +77,79 @@ Features - Thread-safe and process-safe - Supports multiple eviction policies (LRU and LFU included) - Keys support "tag" metadata and eviction -- Developed on Python 2.7 -- Tested on CPython 2.7, 3.4, 3.5, 3.6 and PyPy +- Developed on Python 3.10 +- Tested on CPython 3.6, 3.7, 3.8, 3.9, 3.10 - Tested on Linux, Mac OS X, and Windows -- Tested using Travis CI and AppVeyor CI +- Tested using GitHub Actions -.. image:: https://api.travis-ci.org/grantjenks/python-diskcache.svg?branch=master - :target: http://www.grantjenks.com/docs/diskcache/ +.. image:: https://github.com/grantjenks/python-diskcache/workflows/integration/badge.svg + :target: https://github.com/grantjenks/python-diskcache/actions?query=workflow%3Aintegration -.. image:: https://ci.appveyor.com/api/projects/status/github/grantjenks/python-diskcache?branch=master&svg=true - :target: http://www.grantjenks.com/docs/diskcache/ +.. image:: https://github.com/grantjenks/python-diskcache/workflows/release/badge.svg + :target: https://github.com/grantjenks/python-diskcache/actions?query=workflow%3Arelease Quickstart ---------- -Installing DiskCache is simple with -`pip `_:: +Installing `DiskCache`_ is simple with `pip `_:: $ pip install diskcache You can access documentation in the interpreter with Python's built-in help function:: + >>> import diskcache + >>> help(diskcache) # doctest: +SKIP + +The core of `DiskCache`_ is three data types intended for caching. `Cache`_ +objects manage a SQLite database and filesystem directory to store key and +value pairs. `FanoutCache`_ provides a sharding layer to utilize multiple +caches and `DjangoCache`_ integrates that with `Django`_:: + >>> from diskcache import Cache, FanoutCache, DjangoCache - >>> help(Cache) - >>> help(FanoutCache) - >>> help(DjangoCache) + >>> help(Cache) # doctest: +SKIP + >>> help(FanoutCache) # doctest: +SKIP + >>> help(DjangoCache) # doctest: +SKIP + +Built atop the caching data types, are `Deque`_ and `Index`_ which work as a +cross-process, persistent replacements for Python's ``collections.deque`` and +``dict``. These implement the sequence and mapping container base classes:: + + >>> from diskcache import Deque, Index + >>> help(Deque) # doctest: +SKIP + >>> help(Index) # doctest: +SKIP + +Finally, a number of `recipes`_ for cross-process synchronization are provided +using an underlying cache. Features like memoization with cache stampede +prevention, cross-process locking, and cross-process throttling are available:: + + >>> from diskcache import memoize_stampede, Lock, throttle + >>> help(memoize_stampede) # doctest: +SKIP + >>> help(Lock) # doctest: +SKIP + >>> help(throttle) # doctest: +SKIP + +Python's docstrings are a quick way to get started but not intended as a +replacement for the `DiskCache Tutorial`_ and `DiskCache API Reference`_. + +.. _`Cache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#cache +.. _`FanoutCache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#fanoutcache +.. _`DjangoCache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#djangocache +.. _`Django`: https://www.djangoproject.com/ +.. _`Deque`: http://www.grantjenks.com/docs/diskcache/tutorial.html#deque +.. _`Index`: http://www.grantjenks.com/docs/diskcache/tutorial.html#index +.. _`recipes`: http://www.grantjenks.com/docs/diskcache/tutorial.html#recipes User Guide ---------- For those wanting more details, this part of the documentation describes -introduction, benchmarks, development, and API. +tutorial, benchmarks, API, and development. * `DiskCache Tutorial`_ * `DiskCache Cache Benchmarks`_ * `DiskCache DjangoCache Benchmarks`_ * `Case Study: Web Crawler`_ +* `Case Study: Landing Page Caching`_ * `Talk: All Things Cached - SF Python 2017 Meetup`_ * `DiskCache API Reference`_ * `DiskCache Development`_ @@ -108,11 +159,220 @@ introduction, benchmarks, development, and API. .. _`DiskCache DjangoCache Benchmarks`: http://www.grantjenks.com/docs/diskcache/djangocache-benchmarks.html .. _`Talk: All Things Cached - SF Python 2017 Meetup`: http://www.grantjenks.com/docs/diskcache/sf-python-2017-meetup-talk.html .. _`Case Study: Web Crawler`: http://www.grantjenks.com/docs/diskcache/case-study-web-crawler.html +.. _`Case Study: Landing Page Caching`: http://www.grantjenks.com/docs/diskcache/case-study-landing-page-caching.html .. _`DiskCache API Reference`: http://www.grantjenks.com/docs/diskcache/api.html .. _`DiskCache Development`: http://www.grantjenks.com/docs/diskcache/development.html -Reference and Indices ---------------------- +Comparisons +----------- + +Comparisons to popular projects related to `DiskCache`_. + +Key-Value Stores +................ + +`DiskCache`_ is mostly a simple key-value store. Feature comparisons with four +other projects are shown in the tables below. + +* `dbm`_ is part of Python's standard library and implements a generic + interface to variants of the DBM database — dbm.gnu or dbm.ndbm. If none of + these modules is installed, the slow-but-simple dbm.dumb is used. +* `shelve`_ is part of Python's standard library and implements a “shelf” as a + persistent, dictionary-like object. The difference with “dbm” databases is + that the values can be anything that the pickle module can handle. +* `sqlitedict`_ is a lightweight wrapper around Python's sqlite3 database with + a simple, Pythonic dict-like interface and support for multi-thread + access. Keys are arbitrary strings, values arbitrary pickle-able objects. +* `pickleDB`_ is a lightweight and simple key-value store. It is built upon + Python's simplejson module and was inspired by Redis. It is licensed with the + BSD three-clause license. + +.. _`dbm`: https://docs.python.org/3/library/dbm.html +.. _`shelve`: https://docs.python.org/3/library/shelve.html +.. _`sqlitedict`: https://github.com/RaRe-Technologies/sqlitedict +.. _`pickleDB`: https://pythonhosted.org/pickleDB/ + +**Features** + +================ ============= ========= ========= ============ ============ +Feature diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +Atomic? Always Maybe Maybe Maybe No +Persistent? Yes Yes Yes Yes Yes +Thread-safe? Yes No No Yes No +Process-safe? Yes No No Maybe No +Backend? SQLite DBM DBM SQLite File +Serialization? Customizable None Pickle Customizable JSON +Data Types? Mapping/Deque Mapping Mapping Mapping Mapping +Ordering? Insert/Sorted None None None None +Eviction? LRU/LFU/more None None None None +Vacuum? Automatic Maybe Maybe Manual Automatic +Transactions? Yes No No Maybe No +Multiprocessing? Yes No No No No +Forkable? Yes No No No No +Metadata? Yes No No No No +================ ============= ========= ========= ============ ============ + +**Quality** + +================ ============= ========= ========= ============ ============ +Project diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +Tests? Yes Yes Yes Yes Yes +Coverage? Yes Yes Yes Yes No +Stress? Yes No No No No +CI Tests? Linux/Windows Yes Yes Linux No +Python? 2/3/PyPy All All 2/3 2/3 +License? Apache2 Python Python Apache2 3-Clause BSD +Docs? Extensive Summary Summary Readme Summary +Benchmarks? Yes No No No No +Sources? GitHub GitHub GitHub GitHub GitHub +Pure-Python? Yes Yes Yes Yes Yes +Server? No No No No No +Integrations? Django None None None None +================ ============= ========= ========= ============ ============ + +**Timings** + +These are rough measurements. See `DiskCache Cache Benchmarks`_ for more +rigorous data. + +================ ============= ========= ========= ============ ============ +Project diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +get 25 µs 36 µs 41 µs 513 µs 92 µs +set 198 µs 900 µs 928 µs 697 µs 1,020 µs +delete 248 µs 740 µs 702 µs 1,717 µs 1,020 µs +================ ============= ========= ========= ============ ============ + +Caching Libraries +................. + +* `joblib.Memory`_ provides caching functions and works by explicitly saving + the inputs and outputs to files. It is designed to work with non-hashable and + potentially large input and output data types such as numpy arrays. +* `klepto`_ extends Python’s `lru_cache` to utilize different keymaps and + alternate caching algorithms, such as `lfu_cache` and `mru_cache`. Klepto + uses a simple dictionary-sytle interface for all caches and archives. + +.. _`klepto`: https://pypi.org/project/klepto/ +.. _`joblib.Memory`: https://joblib.readthedocs.io/en/latest/memory.html + +Data Structures +............... + +* `dict`_ is a mapping object that maps hashable keys to arbitrary + values. Mappings are mutable objects. There is currently only one standard + Python mapping type, the dictionary. +* `pandas`_ is a Python package providing fast, flexible, and expressive data + structures designed to make working with “relational” or “labeled” data both + easy and intuitive. +* `Sorted Containers`_ is an Apache2 licensed sorted collections library, + written in pure-Python, and fast as C-extensions. Sorted Containers + implements sorted list, sorted dictionary, and sorted set data types. + +.. _`dict`: https://docs.python.org/3/library/stdtypes.html#typesmapping +.. _`pandas`: https://pandas.pydata.org/ +.. _`Sorted Containers`: http://www.grantjenks.com/docs/sortedcontainers/ + +Pure-Python Databases +..................... + +* `ZODB`_ supports an isomorphic interface for database operations which means + there's little impact on your code to make objects persistent and there's no + database mapper that partially hides the datbase. +* `CodernityDB`_ is an open source, pure-Python, multi-platform, schema-less, + NoSQL database and includes an HTTP server version, and a Python client + library that aims to be 100% compatible with the embedded version. +* `TinyDB`_ is a tiny, document oriented database optimized for your + happiness. If you need a simple database with a clean API that just works + without lots of configuration, TinyDB might be the right choice for you. + +.. _`ZODB`: http://www.zodb.org/ +.. _`CodernityDB`: https://pypi.org/project/CodernityDB/ +.. _`TinyDB`: https://tinydb.readthedocs.io/ + +Object Relational Mappings (ORM) +................................ + +* `Django ORM`_ provides models that are the single, definitive source of + information about data and contains the essential fields and behaviors of the + stored data. Generally, each model maps to a single SQL database table. +* `SQLAlchemy`_ is the Python SQL toolkit and Object Relational Mapper that + gives application developers the full power and flexibility of SQL. It + provides a full suite of well known enterprise-level persistence patterns. +* `Peewee`_ is a simple and small ORM. It has few (but expressive) concepts, + making it easy to learn and intuitive to use. Peewee supports Sqlite, MySQL, + and PostgreSQL with tons of extensions. +* `SQLObject`_ is a popular Object Relational Manager for providing an object + interface to your database, with tables as classes, rows as instances, and + columns as attributes. +* `Pony ORM`_ is a Python ORM with beautiful query syntax. Use Python syntax + for interacting with the database. Pony translates such queries into SQL and + executes them in the database in the most efficient way. + +.. _`Django ORM`: https://docs.djangoproject.com/en/dev/topics/db/ +.. _`SQLAlchemy`: https://www.sqlalchemy.org/ +.. _`Peewee`: http://docs.peewee-orm.com/ +.. _`SQLObject`: http://sqlobject.org/ +.. _`Pony ORM`: https://ponyorm.com/ + +SQL Databases +............. + +* `SQLite`_ is part of Python's standard library and provides a lightweight + disk-based database that doesn’t require a separate server process and allows + accessing the database using a nonstandard variant of the SQL query language. +* `MySQL`_ is one of the world’s most popular open source databases and has + become a leading database choice for web-based applications. MySQL includes a + standardized database driver for Python platforms and development. +* `PostgreSQL`_ is a powerful, open source object-relational database system + with over 30 years of active development. Psycopg is the most popular + PostgreSQL adapter for the Python programming language. +* `Oracle DB`_ is a relational database management system (RDBMS) from the + Oracle Corporation. Originally developed in 1977, Oracle DB is one of the + most trusted and widely used enterprise relational database engines. +* `Microsoft SQL Server`_ is a relational database management system developed + by Microsoft. As a database server, it stores and retrieves data as requested + by other software applications. + +.. _`SQLite`: https://docs.python.org/3/library/sqlite3.html +.. _`MySQL`: https://dev.mysql.com/downloads/connector/python/ +.. _`PostgreSQL`: http://initd.org/psycopg/ +.. _`Oracle DB`: https://pypi.org/project/cx_Oracle/ +.. _`Microsoft SQL Server`: https://pypi.org/project/pyodbc/ + +Other Databases +............... + +* `Memcached`_ is free and open source, high-performance, distributed memory + object caching system, generic in nature, but intended for use in speeding up + dynamic web applications by alleviating database load. +* `Redis`_ is an open source, in-memory data structure store, used as a + database, cache and message broker. It supports data structures such as + strings, hashes, lists, sets, sorted sets with range queries, and more. +* `MongoDB`_ is a cross-platform document-oriented database program. Classified + as a NoSQL database program, MongoDB uses JSON-like documents with + schema. PyMongo is the recommended way to work with MongoDB from Python. +* `LMDB`_ is a lightning-fast, memory-mapped database. With memory-mapped + files, it has the read performance of a pure in-memory database while + retaining the persistence of standard disk-based databases. +* `BerkeleyDB`_ is a software library intended to provide a high-performance + embedded database for key/value data. Berkeley DB is a programmatic toolkit + that provides built-in database support for desktop and server applications. +* `LevelDB`_ is a fast key-value storage library written at Google that + provides an ordered mapping from string keys to string values. Data is stored + sorted by key and users can provide a custom comparison function. + +.. _`Memcached`: https://pypi.org/project/python-memcached/ +.. _`MongoDB`: https://api.mongodb.com/python/current/ +.. _`Redis`: https://redis.io/clients#python +.. _`LMDB`: https://lmdb.readthedocs.io/ +.. _`BerkeleyDB`: https://pypi.org/project/bsddb3/ +.. _`LevelDB`: https://plyvel.readthedocs.io/ + +Reference +--------- * `DiskCache Documentation`_ * `DiskCache at PyPI`_ @@ -124,21 +384,20 @@ Reference and Indices .. _`DiskCache at GitHub`: https://github.com/grantjenks/python-diskcache/ .. _`DiskCache Issue Tracker`: https://github.com/grantjenks/python-diskcache/issues/ -DiskCache License ------------------ +License +------- -Copyright 2016-2018 Grant Jenks +Copyright 2016-2023 Grant Jenks -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. .. _`DiskCache`: http://www.grantjenks.com/docs/diskcache/ diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 4afd5c8..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,22 +0,0 @@ -environment: - - matrix: - - - PYTHON: "C:\\Python27" - - PYTHON: "C:\\Python34" - - PYTHON: "C:\\Python35" - - PYTHON: "C:\\Python36" - - PYTHON: "C:\\Python27-x64" - - PYTHON: "C:\\Python34-x64" - - PYTHON: "C:\\Python35-x64" - - PYTHON: "C:\\Python36-x64" - -install: - - - "%PYTHON%\\python.exe -m pip install nose mock django==1.11.12" - -build: off - -test_script: - - - "%PYTHON%\\python.exe -m nose -v" diff --git a/diskcache/__init__.py b/diskcache/__init__.py index ca58f14..7757d66 100644 --- a/diskcache/__init__.py +++ b/diskcache/__init__.py @@ -1,36 +1,68 @@ -"DiskCache: disk and file backed cache." +""" +DiskCache API Reference +======================= -from .core import Cache, Disk, UnknownFileWarning, EmptyDirWarning, Timeout -from .core import DEFAULT_SETTINGS, ENOVAL, EVICTION_POLICY, UNKNOWN +The :doc:`tutorial` provides a helpful walkthrough of most methods. +""" + +from .core import ( + DEFAULT_SETTINGS, + ENOVAL, + EVICTION_POLICY, + UNKNOWN, + Cache, + Disk, + EmptyDirWarning, + JSONDisk, + Timeout, + UnknownFileWarning, +) from .fanout import FanoutCache from .persistent import Deque, Index +from .recipes import ( + Averager, + BoundedSemaphore, + Lock, + RLock, + barrier, + memoize_stampede, + throttle, +) __all__ = [ + 'Averager', + 'BoundedSemaphore', 'Cache', - 'Disk', - 'UnknownFileWarning', - 'EmptyDirWarning', - 'Timeout', 'DEFAULT_SETTINGS', + 'Deque', + 'Disk', 'ENOVAL', 'EVICTION_POLICY', - 'UNKNOWN', + 'EmptyDirWarning', 'FanoutCache', - 'Deque', 'Index', + 'JSONDisk', + 'Lock', + 'RLock', + 'Timeout', + 'UNKNOWN', + 'UnknownFileWarning', + 'barrier', + 'memoize_stampede', + 'throttle', ] try: - from .djangocache import DjangoCache # pylint: disable=wrong-import-position + from .djangocache import DjangoCache # noqa + __all__.append('DjangoCache') -except Exception: # pylint: disable=broad-except +except Exception: # pylint: disable=broad-except # pragma: no cover # Django not installed or not setup so ignore. pass - __title__ = 'diskcache' -__version__ = '3.0.6' -__build__ = 0x030006 +__version__ = '5.6.3' +__build__ = 0x050603 __author__ = 'Grant Jenks' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2016-2018 Grant Jenks' +__copyright__ = 'Copyright 2016-2023 Grant Jenks' diff --git a/diskcache/cli.py b/diskcache/cli.py index 44bffeb..6a39f60 100644 --- a/diskcache/cli.py +++ b/diskcache/cli.py @@ -1 +1 @@ -"Command line interface to disk cache." +"""Command line interface to disk cache.""" diff --git a/diskcache/core.py b/diskcache/core.py index e449636..7a3d23b 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -1,5 +1,4 @@ """Core disk and file backed cache API. - """ import codecs @@ -7,49 +6,35 @@ import errno import functools as ft import io +import json import os import os.path as op +import pickle import pickletools import sqlite3 import struct -import sys +import tempfile import threading import time import warnings import zlib -if sys.hexversion < 0x03000000: - import cPickle as pickle # pylint: disable=import-error - # ISSUE #25 Fix for http://bugs.python.org/issue10211 - from cStringIO import StringIO as BytesIO # pylint: disable=import-error - TextType = unicode # pylint: disable=invalid-name,undefined-variable - BytesType = str - INT_TYPES = int, long # pylint: disable=undefined-variable - range = xrange # pylint: disable=redefined-builtin,invalid-name,undefined-variable - io_open = io.open # pylint: disable=invalid-name -else: - import pickle - from io import BytesIO # pylint: disable=ungrouped-imports - TextType = str - BytesType = bytes - INT_TYPES = (int,) - io_open = open # pylint: disable=invalid-name - -try: - WindowsError -except NameError: - class WindowsError(Exception): - "Windows error place-holder on platforms without support." - pass + +def full_name(func): + """Return full name of `func` by adding the module and function name.""" + return func.__module__ + '.' + func.__qualname__ + class Constant(tuple): - "Pretty display of immutable constant." + """Pretty display of immutable constant.""" + def __new__(cls, name): return tuple.__new__(cls, (name,)) def __repr__(self): return '%s' % self[0] + DBNAME = 'cache.db' ENOVAL = Constant('ENOVAL') UNKNOWN = Constant('UNKNOWN') @@ -61,25 +46,25 @@ def __repr__(self): MODE_PICKLE = 4 DEFAULT_SETTINGS = { - u'statistics': 0, # False - u'tag_index': 0, # False - u'eviction_policy': u'least-recently-stored', - u'size_limit': 2 ** 30, # 1gb - u'cull_limit': 10, - u'sqlite_auto_vacuum': 1, # FULL - u'sqlite_cache_size': 2 ** 13, # 8,192 pages - u'sqlite_journal_mode': u'wal', - u'sqlite_mmap_size': 2 ** 26, # 64mb - u'sqlite_synchronous': 1, # NORMAL - u'disk_min_file_size': 2 ** 15, # 32kb - u'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, + 'statistics': 0, # False + 'tag_index': 0, # False + 'eviction_policy': 'least-recently-stored', + 'size_limit': 2**30, # 1gb + 'cull_limit': 10, + 'sqlite_auto_vacuum': 1, # FULL + 'sqlite_cache_size': 2**13, # 8,192 pages + 'sqlite_journal_mode': 'wal', + 'sqlite_mmap_size': 2**26, # 64mb + 'sqlite_synchronous': 1, # NORMAL + 'disk_min_file_size': 2**15, # 32kb + 'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, } METADATA = { - u'count': 0, - u'size': 0, - u'hits': 0, - u'misses': 0, + 'count': 0, + 'size': 0, + 'hits': 0, + 'misses': 0, } EVICTION_POLICY = { @@ -115,8 +100,9 @@ def __repr__(self): } -class Disk(object): - "Cache key and value serialization for SQLite database and files." +class Disk: + """Cache key and value serialization for SQLite database and files.""" + def __init__(self, directory, min_file_size=0, pickle_protocol=0): """Initialize disk instance. @@ -129,7 +115,6 @@ def __init__(self, directory, min_file_size=0, pickle_protocol=0): self.min_file_size = min_file_size self.pickle_protocol = pickle_protocol - def hash(self, key): """Compute portable hash for `key`. @@ -143,15 +128,14 @@ def hash(self, key): if type_disk_key is sqlite3.Binary: return zlib.adler32(disk_key) & mask - elif type_disk_key is TextType: - return zlib.adler32(disk_key.encode('utf-8')) & mask # pylint: disable=no-member - elif type_disk_key in INT_TYPES: + elif type_disk_key is str: + return zlib.adler32(disk_key.encode('utf-8')) & mask # noqa + elif type_disk_key is int: return disk_key % mask else: assert type_disk_key is float return zlib.adler32(struct.pack('!d', disk_key)) & mask - def put(self, key): """Convert `key` to fields key and raw for Cache table. @@ -159,22 +143,25 @@ def put(self, key): :return: (database key, raw boolean) pair """ - # pylint: disable=bad-continuation,unidiomatic-typecheck + # pylint: disable=unidiomatic-typecheck type_key = type(key) - if type_key is BytesType: + if type_key is bytes: return sqlite3.Binary(key), True - elif ((type_key is TextType) - or (type_key in INT_TYPES - and -9223372036854775808 <= key <= 9223372036854775807) - or (type_key is float)): + elif ( + (type_key is str) + or ( + type_key is int + and -9223372036854775808 <= key <= 9223372036854775807 + ) + or (type_key is float) + ): return key, True else: data = pickle.dumps(key, protocol=self.pickle_protocol) result = pickletools.optimize(data) return sqlite3.Binary(result), False - def get(self, key, raw): """Convert fields `key` and `raw` from Cache table to key. @@ -183,12 +170,11 @@ def get(self, key, raw): :return: corresponding Python key """ - # pylint: disable=no-self-use,unidiomatic-typecheck + # pylint: disable=unidiomatic-typecheck if raw: - return BytesType(key) if type(key) is sqlite3.Binary else key + return bytes(key) if type(key) is sqlite3.Binary else key else: - return pickle.load(BytesIO(key)) - + return pickle.load(io.BytesIO(key)) def store(self, value, read, key=UNKNOWN): """Convert `value` to fields size, mode, filename, and value for Cache @@ -204,39 +190,32 @@ def store(self, value, read, key=UNKNOWN): type_value = type(value) min_file_size = self.min_file_size - if ((type_value is TextType and len(value) < min_file_size) - or (type_value in INT_TYPES - and -9223372036854775808 <= value <= 9223372036854775807) - or (type_value is float)): + if ( + (type_value is str and len(value) < min_file_size) + or ( + type_value is int + and -9223372036854775808 <= value <= 9223372036854775807 + ) + or (type_value is float) + ): return 0, MODE_RAW, None, value - elif type_value is BytesType: + elif type_value is bytes: if len(value) < min_file_size: return 0, MODE_RAW, None, sqlite3.Binary(value) else: filename, full_path = self.filename(key, value) - - with open(full_path, 'wb') as writer: - writer.write(value) - + self._write(full_path, io.BytesIO(value), 'xb') return len(value), MODE_BINARY, filename, None - elif type_value is TextType: + elif type_value is str: filename, full_path = self.filename(key, value) - - with io_open(full_path, 'w', encoding='UTF-8') as writer: - writer.write(value) - + self._write(full_path, io.StringIO(value), 'x', 'UTF-8') size = op.getsize(full_path) return size, MODE_TEXT, filename, None elif read: - size = 0 - reader = ft.partial(value.read, 2 ** 22) + reader = ft.partial(value.read, 2**22) filename, full_path = self.filename(key, value) - - with open(full_path, 'wb') as writer: - for chunk in iter(reader, b''): - size += len(chunk) - writer.write(chunk) - + iterator = iter(reader, b'') + size = self._write(full_path, iterator, 'xb') return size, MODE_BINARY, filename, None else: result = pickle.dumps(value, protocol=self.pickle_protocol) @@ -245,12 +224,32 @@ def store(self, value, read, key=UNKNOWN): return 0, MODE_PICKLE, None, sqlite3.Binary(result) else: filename, full_path = self.filename(key, value) + self._write(full_path, io.BytesIO(result), 'xb') + return len(result), MODE_PICKLE, filename, None - with open(full_path, 'wb') as writer: - writer.write(result) + def _write(self, full_path, iterator, mode, encoding=None): + full_dir, _ = op.split(full_path) - return len(result), MODE_PICKLE, filename, None + for count in range(1, 11): + with cl.suppress(OSError): + os.makedirs(full_dir) + + try: + # Another cache may have deleted the directory before + # the file could be opened. + writer = open(full_path, mode, encoding=encoding) + except OSError: + if count == 10: + # Give up after 10 tries to open the file. + raise + continue + with writer: + size = 0 + for chunk in iterator: + size += len(chunk) + writer.write(chunk) + return size def fetch(self, mode, filename, value, read): """Convert fields `mode`, `filename`, and `value` from Cache table to @@ -261,11 +260,12 @@ def fetch(self, mode, filename, value, read): :param value: database value :param bool read: when True, return an open file handle :return: corresponding Python value + :raises: IOError if the value cannot be read """ - # pylint: disable=no-self-use,unidiomatic-typecheck + # pylint: disable=unidiomatic-typecheck,consider-using-with if mode == MODE_RAW: - return BytesType(value) if type(value) is sqlite3.Binary else value + return bytes(value) if type(value) is sqlite3.Binary else value elif mode == MODE_BINARY: if read: return open(op.join(self._directory, filename), 'rb') @@ -274,15 +274,14 @@ def fetch(self, mode, filename, value, read): return reader.read() elif mode == MODE_TEXT: full_path = op.join(self._directory, filename) - with io_open(full_path, 'r', encoding='UTF-8') as reader: + with open(full_path, 'r', encoding='UTF-8') as reader: return reader.read() elif mode == MODE_PICKLE: if value is None: with open(op.join(self._directory, filename), 'rb') as reader: return pickle.load(reader) else: - return pickle.load(BytesIO(value)) - + return pickle.load(io.BytesIO(value)) def filename(self, key=UNKNOWN, value=UNKNOWN): """Return filename and full-path tuple for file storage. @@ -306,60 +305,119 @@ def filename(self, key=UNKNOWN, value=UNKNOWN): hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') sub_dir = op.join(hex_name[:2], hex_name[2:4]) name = hex_name[4:] + '.val' - directory = op.join(self._directory, sub_dir) - - try: - os.makedirs(directory) - except OSError as error: - if error.errno != errno.EEXIST: - raise - filename = op.join(sub_dir, name) full_path = op.join(self._directory, filename) return filename, full_path + def remove(self, file_path): + """Remove a file given by `file_path`. - def remove(self, filename): - """Remove a file given by `filename`. - - This method is cross-thread and cross-process safe. If an "error no - entry" occurs, it is suppressed. + This method is cross-thread and cross-process safe. If an OSError + occurs, it is suppressed. - :param str filename: relative path to file + :param str file_path: relative path to file """ - full_path = op.join(self._directory, filename) + full_path = op.join(self._directory, file_path) + full_dir, _ = op.split(full_path) - try: + # Suppress OSError that may occur if two caches attempt to delete the + # same file or directory at the same time. + + with cl.suppress(OSError): os.remove(full_path) - except WindowsError: - pass - except OSError as error: - if error.errno != errno.ENOENT: - # ENOENT may occur if two caches attempt to delete the same - # file at the same time. - raise + + with cl.suppress(OSError): + os.removedirs(full_dir) + + +class JSONDisk(Disk): + """Cache key and value using JSON serialization with zlib compression.""" + + def __init__(self, directory, compress_level=1, **kwargs): + """Initialize JSON disk instance. + + Keys and values are compressed using the zlib library. The + `compress_level` is an integer from 0 to 9 controlling the level of + compression; 1 is fastest and produces the least compression, 9 is + slowest and produces the most compression, and 0 is no compression. + + :param str directory: directory path + :param int compress_level: zlib compression level (default 1) + :param kwargs: super class arguments + + """ + self.compress_level = compress_level + super().__init__(directory, **kwargs) + + def put(self, key): + json_bytes = json.dumps(key).encode('utf-8') + data = zlib.compress(json_bytes, self.compress_level) + return super().put(data) + + def get(self, key, raw): + data = super().get(key, raw) + return json.loads(zlib.decompress(data).decode('utf-8')) + + def store(self, value, read, key=UNKNOWN): + if not read: + json_bytes = json.dumps(value).encode('utf-8') + value = zlib.compress(json_bytes, self.compress_level) + return super().store(value, read, key=key) + + def fetch(self, mode, filename, value, read): + data = super().fetch(mode, filename, value, read) + if not read: + data = json.loads(zlib.decompress(data).decode('utf-8')) + return data class Timeout(Exception): - "Database timeout expired." - pass + """Database timeout expired.""" class UnknownFileWarning(UserWarning): - "Warning used by Cache.check for unknown files." - pass + """Warning used by Cache.check for unknown files.""" class EmptyDirWarning(UserWarning): - "Warning used by Cache.check for empty directories." - pass + """Warning used by Cache.check for empty directories.""" + + +def args_to_key(base, args, kwargs, typed, ignore): + """Create cache key out of function arguments. + :param tuple base: base of key + :param tuple args: function arguments + :param dict kwargs: function keyword arguments + :param bool typed: include types in cache key + :param set ignore: positional or keyword args to ignore + :return: cache key tuple -class Cache(object): - "Disk and file backed cache." - # pylint: disable=bad-continuation - def __init__(self, directory, timeout=60, disk=Disk, **settings): + """ + args = tuple(arg for index, arg in enumerate(args) if index not in ignore) + key = base + args + (None,) + + if kwargs: + kwargs = {key: val for key, val in kwargs.items() if key not in ignore} + sorted_items = sorted(kwargs.items()) + + for item in sorted_items: + key += item + + if typed: + key += tuple(type(arg) for arg in args) + + if kwargs: + key += tuple(type(value) for _, value in sorted_items) + + return key + + +class Cache: + """Disk and file backed cache.""" + + def __init__(self, directory=None, timeout=60, disk=Disk, **settings): """Initialize cache instance. :param str directory: cache directory @@ -371,11 +429,18 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): try: assert issubclass(disk, Disk) except (TypeError, AssertionError): - raise ValueError('disk must subclass diskcache.Disk') + raise ValueError('disk must subclass diskcache.Disk') from None + + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = str(directory) + directory = op.expanduser(directory) + directory = op.expandvars(directory) self._directory = directory - self._timeout = 60 # Use 1 minute timeout for initialization. + self._timeout = 0 # Manually handle retries during initialization. self._local = threading.local() + self._txn_id = None if not op.isdir(directory): try: @@ -385,17 +450,17 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): raise EnvironmentError( error.errno, 'Cache directory "%s" does not exist' - ' and could not be created' % self._directory - ) + ' and could not be created' % self._directory, + ) from None - sql = self._sql + sql = self._sql_retry # Setup Settings table. try: - current_settings = dict(sql( - 'SELECT key, value FROM Settings' - ).fetchall()) + current_settings = dict( + sql('SELECT key, value FROM Settings').fetchall() + ) except sqlite3.OperationalError: current_settings = {} @@ -409,12 +474,11 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): # Chance to set pragmas before any tables are created. for key, value in sorted(sets.items()): - if not key.startswith('sqlite_'): - continue - - self.reset(key, value, update=False) + if key.startswith('sqlite_'): + self.reset(key, value, update=False) - sql('CREATE TABLE IF NOT EXISTS Settings (' + sql( + 'CREATE TABLE IF NOT EXISTS Settings (' ' key TEXT NOT NULL UNIQUE,' ' value)' ) @@ -422,7 +486,8 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): # Setup Disk object (must happen after settings initialized). kwargs = { - key[5:]: value for key, value in sets.items() + key[5:]: value + for key, value in sets.items() if key.startswith('disk_') } self._disk = disk(directory, **kwargs) @@ -439,11 +504,12 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): sql(query, (key, value)) self.reset(key) - (self._page_size,), = sql('PRAGMA page_size').fetchall() + ((self._page_size,),) = sql('PRAGMA page_size').fetchall() # Setup Cache table. - sql('CREATE TABLE IF NOT EXISTS Cache (' + sql( + 'CREATE TABLE IF NOT EXISTS Cache (' ' rowid INTEGER PRIMARY KEY,' ' key BLOB,' ' raw INTEGER,' @@ -458,12 +524,14 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): ' value BLOB)' ) - sql('CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' + sql( + 'CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' ' Cache(key, raw)' ) - sql('CREATE INDEX IF NOT EXISTS Cache_expire_time ON' - ' Cache (expire_time)' + sql( + 'CREATE INDEX IF NOT EXISTS Cache_expire_time ON' + ' Cache (expire_time) WHERE expire_time IS NOT NULL' ) query = EVICTION_POLICY[self.eviction_policy]['init'] @@ -473,32 +541,37 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): # Use triggers to keep Metadata updated. - sql('CREATE TRIGGER IF NOT EXISTS Settings_count_insert' + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_insert' ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' ' UPDATE Settings SET value = value + 1' ' WHERE key = "count"; END' ) - sql('CREATE TRIGGER IF NOT EXISTS Settings_count_delete' + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_delete' ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' ' UPDATE Settings SET value = value - 1' ' WHERE key = "count"; END' ) - sql('CREATE TRIGGER IF NOT EXISTS Settings_size_insert' + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_insert' ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' ' UPDATE Settings SET value = value + NEW.size' ' WHERE key = "size"; END' ) - sql('CREATE TRIGGER IF NOT EXISTS Settings_size_update' + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_update' ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' ' UPDATE Settings' ' SET value = value + NEW.size - OLD.size' ' WHERE key = "size"; END' ) - sql('CREATE TRIGGER IF NOT EXISTS Settings_size_delete' + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_delete' ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' ' UPDATE Settings SET value = value - OLD.size' ' WHERE key = "size"; END' @@ -517,27 +590,33 @@ def __init__(self, directory, timeout=60, disk=Disk, **settings): self._timeout = timeout self._sql # pylint: disable=pointless-statement - @property def directory(self): """Cache directory.""" return self._directory - @property def timeout(self): """SQLite connection timeout value in seconds.""" return self._timeout - @property def disk(self): """Disk used for serialization.""" return self._disk - @property - def _sql(self): + def _con(self): + # Check process ID to support process forking. If the process + # ID changes, close the connection and update the process ID. + + local_pid = getattr(self._local, 'pid', None) + pid = os.getpid() + + if local_pid != pid: + self.close() + self._local.pid = pid + con = getattr(self._local, 'con', None) if con is None: @@ -547,9 +626,10 @@ def _sql(self): isolation_level=None, ) - # Some SQLite pragmas work on a per-connection basis so query the - # Settings table and reset the pragmas. The Settings table may not - # exist so catch and ignore the OperationalError that may occur. + # Some SQLite pragmas work on a per-connection basis so + # query the Settings table and reset the pragmas. The + # Settings table may not exist so catch and ignore the + # OperationalError that may occur. try: select = 'SELECT key, value FROM Settings' @@ -561,48 +641,129 @@ def _sql(self): if key.startswith('sqlite_'): self.reset(key, value, update=False) - return con.execute + return con + @property + def _sql(self): + return self._con.execute + + @property + def _sql_retry(self): + sql = self._sql + + # 2018-11-01 GrantJ - Some SQLite builds/versions handle + # the SQLITE_BUSY return value and connection parameter + # "timeout" differently. For a more reliable duration, + # manually retry the statement for 60 seconds. Only used + # by statements which modify the database and do not use + # a transaction (like those in ``__init__`` or ``reset``). + # See Issue #85 for and tests/issue_85.py for more details. + + def _execute_with_retry(statement, *args, **kwargs): + start = time.time() + while True: + try: + return sql(statement, *args, **kwargs) + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + + return _execute_with_retry + + @cl.contextmanager + def transact(self, retry=False): + """Context manager to perform a transaction by locking the cache. + + While the cache is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> with cache.transact(): # Atomically increment two keys. + ... _ = cache.incr('total', 123.4) + ... _ = cache.incr('count', 1) + >>> with cache.transact(): # Atomically calculate average. + ... average = cache['total'] / cache['count'] + >>> average + 123.4 + + :param bool retry: retry if database timeout occurs (default False) + :return: context manager for use in `with` statement + :raises Timeout: if database timeout occurs + + """ + with self._transact(retry=retry): + yield @cl.contextmanager - def _transact(self, filename=None): + def _transact(self, retry=False, filename=None): sql = self._sql filenames = [] _disk_remove = self._disk.remove + tid = threading.get_ident() + txn_id = self._txn_id - try: - sql('BEGIN IMMEDIATE') - except sqlite3.OperationalError: - if filename is not None: - _disk_remove(filename) - raise Timeout + if tid == txn_id: + begin = False + else: + while True: + try: + sql('BEGIN IMMEDIATE') + begin = True + self._txn_id = tid + break + except sqlite3.OperationalError: + if retry: + continue + if filename is not None: + _disk_remove(filename) + raise Timeout from None try: yield sql, filenames.append except BaseException: - sql('ROLLBACK') + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('ROLLBACK') raise else: - sql('COMMIT') + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('COMMIT') for name in filenames: if name is not None: _disk_remove(name) - - def set(self, key, value, expire=None, read=False, tag=None): + def set(self, key, value, expire=None, read=False, tag=None, retry=False): """Set `key` and `value` item in cache. When `read` is `True`, `value` should be a file-like object opened for reading in binary mode. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param value: value for item :param float expire: seconds until item expires (default None, no expiry) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) :return: True if item was set - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ now = time.time() @@ -632,7 +793,7 @@ def set(self, key, value, expire=None, read=False, tag=None): # INSERT OR REPLACE aka UPSERT is not used because the old filename may # need cleanup. - with self._transact(filename) as (sql, cleanup): + with self._transact(retry, filename) as (sql, cleanup): rows = sql( 'SELECT rowid, filename FROM Cache' ' WHERE key = ? AND raw = ?', @@ -640,7 +801,7 @@ def set(self, key, value, expire=None, read=False, tag=None): ).fetchall() if rows: - (rowid, old_filename), = rows + ((rowid, old_filename),) = rows cleanup(old_filename) self._row_update(rowid, now, columns) else: @@ -650,14 +811,22 @@ def set(self, key, value, expire=None, read=False, tag=None): return True + def __setitem__(self, key, value): + """Set corresponding `value` for `key` in cache. - __setitem__ = set + :param key: key for item + :param value: value for item + :return: corresponding value + :raises KeyError: if key is not found + """ + self.set(key, value, retry=True) def _row_update(self, rowid, now, columns): sql = self._sql expire_time, tag, size, mode, filename, value = columns - sql('UPDATE Cache SET' + sql( + 'UPDATE Cache SET' ' store_time = ?,' ' expire_time = ?,' ' access_time = ?,' @@ -667,11 +836,12 @@ def _row_update(self, rowid, now, columns): ' mode = ?,' ' filename = ?,' ' value = ?' - ' WHERE rowid = ?', ( - now, # store_time + ' WHERE rowid = ?', + ( + now, # store_time expire_time, - now, # access_time - 0, # access_count + now, # access_time + 0, # access_count tag, size, mode, @@ -681,20 +851,21 @@ def _row_update(self, rowid, now, columns): ), ) - def _row_insert(self, key, raw, now, columns): sql = self._sql expire_time, tag, size, mode, filename, value = columns - sql('INSERT INTO Cache(' + sql( + 'INSERT INTO Cache(' ' key, raw, store_time, expire_time, access_time,' ' access_count, tag, size, mode, filename, value' - ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', ( + ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( key, raw, - now, # store_time + now, # store_time expire_time, - now, # access_time - 0, # access_count + now, # access_time + 0, # access_count tag, size, mode, @@ -703,7 +874,6 @@ def _row_insert(self, key, raw, now, columns): ), ) - def _cull(self, now, sql, cleanup, limit=None): cull_limit = self.cull_limit if limit is None else limit @@ -722,13 +892,12 @@ def _cull(self, now, sql, cleanup, limit=None): rows = sql(select_expired, (now, cull_limit)).fetchall() if rows: - delete_expired = ( - 'DELETE FROM Cache WHERE rowid IN (%s)' - % (select_expired_template % 'rowid') + delete_expired = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_expired_template % 'rowid' ) sql(delete_expired, (now, cull_limit)) - for filename, in rows: + for (filename,) in rows: cleanup(filename) cull_limit -= len(rows) @@ -747,17 +916,52 @@ def _cull(self, now, sql, cleanup, limit=None): rows = sql(select_filename, (cull_limit,)).fetchall() if rows: - delete = ( - 'DELETE FROM Cache WHERE rowid IN (%s)' - % (select_policy.format(fields='rowid', now=now)) + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_policy.format(fields='rowid', now=now) ) sql(delete, (cull_limit,)) - for filename, in rows: + for (filename,) in rows: cleanup(filename) + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + :raises Timeout: if database timeout occurs - def add(self, key, value, expire=None, read=False, tag=None): + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + + with self._transact(retry) as (sql, _): + rows = sql( + 'SELECT rowid, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_expire_time),) = rows + + if old_expire_time is None or old_expire_time > now: + sql( + 'UPDATE Cache SET expire_time = ? WHERE rowid = ?', + (expire_time, rowid), + ) + return True + + return False + + def add(self, key, value, expire=None, read=False, tag=None, retry=False): """Add `key` and `value` item to cache. Similar to `set`, but only add to cache if key not present. @@ -768,14 +972,18 @@ def add(self, key, value, expire=None, read=False, tag=None): When `read` is `True`, `value` should be a file-like object opened for reading in binary mode. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param value: value for item :param float expire: seconds until the key expires (default None, no expiry) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) :return: True if item was added - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ now = time.time() @@ -784,7 +992,7 @@ def add(self, key, value, expire=None, read=False, tag=None): size, mode, filename, db_value = self._disk.store(value, read, key=key) columns = (expire_time, tag, size, mode, filename, db_value) - with self._transact(filename) as (sql, cleanup): + with self._transact(retry, filename) as (sql, cleanup): rows = sql( 'SELECT rowid, filename, expire_time FROM Cache' ' WHERE key = ? AND raw = ?', @@ -792,7 +1000,7 @@ def add(self, key, value, expire=None, read=False, tag=None): ).fetchall() if rows: - (rowid, old_filename, old_expire_time), = rows + ((rowid, old_filename, old_expire_time),) = rows if old_expire_time is None or old_expire_time > now: cleanup(filename) @@ -807,8 +1015,7 @@ def add(self, key, value, expire=None, read=False, tag=None): return True - - def incr(self, key, delta=1, default=0): + def incr(self, key, delta=1, default=0, retry=False): """Increment value by delta for item with key. If key is missing and default is None then raise KeyError. Else if key @@ -821,12 +1028,16 @@ def incr(self, key, delta=1, default=0): machines with 64-bit pointer widths will support 64-bit signed integers. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param int delta: amount to increment (default 1) - :param int default: value if key is missing (default None) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) :return: new value for item :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ now = time.time() @@ -836,7 +1047,7 @@ def incr(self, key, delta=1, default=0): ' WHERE key = ? AND raw = ?' ) - with self._transact() as (sql, cleanup): + with self._transact(retry) as (sql, cleanup): rows = sql(select, (db_key, raw)).fetchall() if not rows: @@ -844,19 +1055,23 @@ def incr(self, key, delta=1, default=0): raise KeyError(key) value = default + delta - columns = (None, None) + self._disk.store(value, False, key=key) + columns = (None, None) + self._disk.store( + value, False, key=key + ) self._row_insert(db_key, raw, now, columns) self._cull(now, sql, cleanup) return value - (rowid, expire_time, filename, value), = rows + ((rowid, expire_time, filename, value),) = rows if expire_time is not None and expire_time < now: if default is None: raise KeyError(key) value = default + delta - columns = (None, None) + self._disk.store(value, False, key=key) + columns = (None, None) + self._disk.store( + value, False, key=key + ) self._row_update(rowid, now, columns) self._cull(now, sql, cleanup) cleanup(filename) @@ -875,8 +1090,7 @@ def incr(self, key, delta=1, default=0): return value - - def decr(self, key, delta=1, default=0): + def decr(self, key, delta=1, default=0, retry=False): """Decrement value by delta for item with key. If key is missing and default is None then raise KeyError. Else if key @@ -892,20 +1106,34 @@ def decr(self, key, delta=1, default=0): machines with 64-bit pointer widths will support 64-bit signed integers. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param int delta: amount to decrement (default 1) :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) :return: new value for item :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ - return self.incr(key, -delta, default) - - - def get(self, key, default=None, read=False, expire_time=False, tag=False): + return self.incr(key, -delta, default, retry) + + def get( + self, + key, + default=None, + read=False, + expire_time=False, + tag=False, + retry=False, + ): """Retrieve value from cache. If `key` is missing, return `default`. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param default: value to return if key is missing (default None) :param bool read: if True, return file handle to value @@ -913,8 +1141,9 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False): :param bool expire_time: if True, return expire_time in tuple (default False) :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) :return: value for item or default if key not found - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ db_key, raw = self._disk.put(key) @@ -938,16 +1167,13 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False): if not rows: return default - (rowid, db_expire_time, db_tag, mode, filename, db_value), = rows + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows try: value = self._disk.fetch(mode, filename, db_value, read) - except IOError as error: - if error.errno == errno.ENOENT: - # Key was deleted before we could retrieve result. - return default - else: - raise + except IOError: + # Key was deleted before we could retrieve result. + return default else: # Slow path, transaction required. cache_hit = ( @@ -957,7 +1183,7 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False): 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' ) - with self._transact() as (sql, _): + with self._transact(retry) as (sql, _): rows = sql(select, (db_key, raw, time.time())).fetchall() if not rows: @@ -965,19 +1191,17 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False): sql(cache_miss) return default - (rowid, db_expire_time, db_tag, - mode, filename, db_value), = rows + ( + (rowid, db_expire_time, db_tag, mode, filename, db_value), + ) = rows # noqa: E127 try: value = self._disk.fetch(mode, filename, db_value, read) - except IOError as error: - if error.errno == errno.ENOENT: - # Key was deleted before we could retrieve result. - if self.statistics: - sql(cache_miss) - return default - else: - raise + except IOError: + # Key was deleted before we could retrieve result. + if self.statistics: + sql(cache_miss) + return default if self.statistics: sql(cache_hit) @@ -997,37 +1221,37 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False): else: return value - def __getitem__(self, key): """Return corresponding value for `key` from cache. :param key: key matching item :return: corresponding value :raises KeyError: if key is not found - :raises Timeout: if database timeout expires """ - value = self.get(key, default=ENOVAL) + value = self.get(key, default=ENOVAL, retry=True) if value is ENOVAL: raise KeyError(key) return value - - def read(self, key): + def read(self, key, retry=False): """Return file handle value corresponding to `key` from cache. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) :return: file open for reading in binary mode :raises KeyError: if key is not found - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ - handle = self.get(key, default=ENOVAL, read=True) + handle = self.get(key, default=ENOVAL, read=True, retry=retry) if handle is ENOVAL: raise KeyError(key) return handle - def __contains__(self, key): """Return `True` if `key` matching item is found in cache. @@ -1047,21 +1271,26 @@ def __contains__(self, key): return bool(rows) - - def pop(self, key, default=None, expire_time=False, tag=False): + def pop( + self, key, default=None, expire_time=False, tag=False, retry=False + ): # noqa: E501 """Remove corresponding item for `key` from cache and return value. If `key` is missing, return `default`. Operation is atomic. Concurrent operations will be serialized. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key for item :param default: value to return if key is missing (default None) :param bool expire_time: if True, return expire_time in tuple (default False) :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) :return: value for item or default if key not found - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ db_key, raw = self._disk.put(key) @@ -1076,24 +1305,21 @@ def pop(self, key, default=None, expire_time=False, tag=False): elif expire_time or tag: default = default, None - with self._transact() as (sql, _): + with self._transact(retry) as (sql, _): rows = sql(select, (db_key, raw, time.time())).fetchall() if not rows: return default - (rowid, db_expire_time, db_tag, mode, filename, db_value), = rows + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) try: value = self._disk.fetch(mode, filename, db_value, False) - except IOError as error: - if error.errno == errno.ENOENT: - # Key was deleted before we could retrieve result. - return default - else: - raise + except IOError: + # Key was deleted before we could retrieve result. + return default finally: if filename is not None: self._disk.remove(filename) @@ -1107,18 +1333,21 @@ def pop(self, key, default=None, expire_time=False, tag=False): else: return value - - def __delitem__(self, key): + def __delitem__(self, key, retry=True): """Delete corresponding item for `key` from cache. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default `True`). + :param key: key matching item + :param bool retry: retry if database timeout occurs (default True) :raises KeyError: if key is not found - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ db_key, raw = self._disk.put(key) - with self._transact() as (sql, cleanup): + with self._transact(retry) as (sql, cleanup): rows = sql( 'SELECT rowid, filename FROM Cache' ' WHERE key = ? AND raw = ?' @@ -1129,31 +1358,42 @@ def __delitem__(self, key): if not rows: raise KeyError(key) - (rowid, filename), = rows + ((rowid, filename),) = rows sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) cleanup(filename) return True - - def delete(self, key): + def delete(self, key, retry=False): """Delete corresponding item for `key` from cache. Missing keys are ignored. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) :return: True if item was deleted - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ + # pylint: disable=unnecessary-dunder-call try: - return self.__delitem__(key) + return self.__delitem__(key, retry=retry) except KeyError: return False - - def push(self, value, prefix=None, side='back', expire=None, read=False, - tag=None): + def push( + self, + value, + prefix=None, + side='back', + expire=None, + read=False, + tag=None, + retry=False, + ): """Push `value` onto `side` of queue identified by `prefix` in cache. When prefix is None, integer keys are used. Otherwise, string keys are @@ -1167,10 +1407,12 @@ def push(self, value, prefix=None, side='back', expire=None, read=False, When `read` is `True`, `value` should be a file-like object opened for reading in binary mode. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + See also `Cache.pull`. - >>> cache = Cache('/tmp/test') - >>> _ = cache.clear() + >>> cache = Cache() >>> print(cache.push('first value')) 500000000000000 >>> cache.get(500000000000000) @@ -1189,8 +1431,9 @@ def push(self, value, prefix=None, side='back', expire=None, read=False, (default None, no expiry) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) :return: key for item in cache - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ if prefix is None: @@ -1212,14 +1455,14 @@ def push(self, value, prefix=None, side='back', expire=None, read=False, ' ORDER BY key %s LIMIT 1' ) % order[side] - with self._transact(filename) as (sql, cleanup): + with self._transact(retry, filename) as (sql, cleanup): rows = sql(select, (min_key, max_key, raw)).fetchall() if rows: - (key,), = rows + ((key,),) = rows if prefix is not None: - num = int(key[(key.rfind('-') + 1):]) + num = int(key[(key.rfind('-') + 1) :]) else: num = key @@ -1241,9 +1484,15 @@ def push(self, value, prefix=None, side='back', expire=None, read=False, return db_key - - def pull(self, prefix=None, default=(None, None), side='front', - expire_time=False, tag=False): + def pull( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): """Pull key and value item pair from `side` of queue in cache. When prefix is None, integer keys are used. Otherwise, string keys are @@ -1257,10 +1506,12 @@ def pull(self, prefix=None, default=(None, None), side='front', Operation is atomic. Concurrent operations will be serialized. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + See also `Cache.push` and `Cache.get`. - >>> cache = Cache('/tmp/test') - >>> _ = cache.clear() + >>> cache = Cache() >>> cache.pull() (None, None) >>> for letter in 'abc': @@ -1289,10 +1540,12 @@ def pull(self, prefix=None, default=(None, None), side='front', :param bool expire_time: if True, return expire_time in tuple (default False) :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) :return: key and value item pair or default if queue is empty - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ + # Caution: Nearly identical code exists in Cache.peek if prefix is None: min_key = 0 max_key = 999999999999999 @@ -1313,32 +1566,144 @@ def pull(self, prefix=None, default=(None, None), side='front', default = default, None while True: - with self._transact() as (sql, cleanup): - rows = sql(select, (min_key, max_key)).fetchall() + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() - if not rows: - return default + if not rows: + return default - (rowid, key, db_expire, db_tag, mode, name, db_value), = rows + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - if db_expire is not None and db_expire < time.time(): - cleanup(name) - else: - break + if db_expire is not None and db_expire < time.time(): + cleanup(name) + else: + break - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError as error: - if error.errno == errno.ENOENT: + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: # Key was deleted before we could retrieve result. - return default - else: - raise - finally: - if name is not None: - self._disk.remove(name) + continue + finally: + if name is not None: + self._disk.remove(name) + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def peek( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): + """Peek at key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to peeking at key and value item pairs from front of queue. + Set side to 'back' to pull from back of queue. Side must be one of + 'front' or 'back'. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull` and `Cache.push`. + + >>> cache = Cache() + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.peek() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> key, value = cache.peek(side='back') + >>> print(key) + 500000000000002 + >>> value + 'c' + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.pull + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break if expire_time and tag: return (key, value), db_expire, db_tag @@ -1349,8 +1714,180 @@ def pull(self, prefix=None, default=(None, None), side='front', else: return key, value + def peekitem(self, last=True, expire_time=False, tag=False, retry=False): + """Peek at key and value item pair in cache based on iteration order. - def check(self, fix=False): + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> for num, letter in enumerate('abc'): + ... cache[letter] = num + >>> cache.peekitem() + ('c', 2) + >>> cache.peekitem(last=False) + ('a', 0) + + :param bool last: last item in iteration order (default True) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair + :raises KeyError: if cache is empty + :raises Timeout: if database timeout occurs + + """ + order = ('ASC', 'DESC') + select = ( + 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' + ' FROM Cache ORDER BY rowid %s LIMIT 1' + ) % order[last] + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select).fetchall() + + if not rows: + raise KeyError('dictionary is empty') + + ( + ( + rowid, + db_key, + raw, + db_expire, + db_tag, + mode, + name, + db_value, + ), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + key = self._disk.get(db_key, raw) + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def memoize( + self, name=None, typed=False, expire=None, tag=None, ignore=() + ): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When expire is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @cache.memoize(expire=1, tag='fib') + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(cache[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @cache.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param cache: cache to store callable arguments and return values + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param float expire: seconds until arguments expire + (default None, no expiry) + :param str tag: text to associate with arguments (default None) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in DjangoCache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + """Decorator created by memoize() for callable `func`.""" + base = (full_name(func),) if name is None else (name,) + + @ft.wraps(func) + def wrapper(*args, **kwargs): + """Wrapper for callable to cache arguments and return values.""" + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, default=ENOVAL, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + if expire is None or expire > 0: + self.set(key, result, expire, tag=tag, retry=True) + + return result + + def __cache_key__(*args, **kwargs): + """Make key for cache given function arguments.""" + return args_to_key(base, args, kwargs, typed, ignore) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator + + def check(self, fix=False, retry=False): """Check database and file system consistency. Intended for use in testing and post-mortem error analysis. @@ -1361,9 +1898,13 @@ def check(self, fix=False): held for a long time. For example, local benchmarking shows that a cache with 1,000 file references takes ~60ms to check. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) :return: list of warnings - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ # pylint: disable=access-member-before-definition,W0201 @@ -1374,14 +1915,14 @@ def check(self, fix=False): rows = sql('PRAGMA integrity_check').fetchall() - if len(rows) != 1 or rows[0][0] != u'ok': - for message, in rows: + if len(rows) != 1 or rows[0][0] != 'ok': + for (message,) in rows: warnings.warn(message) if fix: sql('VACUUM') - with self._transact() as (sql, _): + with self._transact(retry) as (sql, _): # Check Cache.filename against file system. @@ -1406,7 +1947,8 @@ def check(self, fix=False): warnings.warn(message % args) if fix: - sql('UPDATE Cache SET size = ?' + sql( + 'UPDATE Cache SET size = ?' ' WHERE rowid = ?', (real_size, rowid), ) @@ -1447,14 +1989,15 @@ def check(self, fix=False): # Check Settings.count against count of Cache rows. self.reset('count') - (count,), = sql('SELECT COUNT(key) FROM Cache').fetchall() + ((count,),) = sql('SELECT COUNT(key) FROM Cache').fetchall() if self.count != count: message = 'Settings.count != COUNT(Cache.key); %d != %d' warnings.warn(message % (self.count, count)) if fix: - sql('UPDATE Settings SET value = ? WHERE key = ?', + sql( + 'UPDATE Settings SET value = ? WHERE key = ?', (count, 'count'), ) @@ -1462,45 +2005,46 @@ def check(self, fix=False): self.reset('size') select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' - (size,), = sql(select_size).fetchall() + ((size,),) = sql(select_size).fetchall() if self.size != size: message = 'Settings.size != SUM(Cache.size); %d != %d' warnings.warn(message % (self.size, size)) if fix: - sql('UPDATE Settings SET value = ? WHERE key =?', + sql( + 'UPDATE Settings SET value = ? WHERE key =?', (size, 'size'), ) return warns - def create_tag_index(self): """Create tag index on cache database. It is better to initialize cache with `tag_index=True` than use this. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ sql = self._sql - sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') + sql( + 'CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid) ' + 'WHERE tag IS NOT NULL' + ) self.reset('tag_index', 1) - def drop_tag_index(self): """Drop tag index on cache database. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ sql = self._sql sql('DROP INDEX IF EXISTS Cache_tag_rowid') self.reset('tag_index', 0) - - def evict(self, tag): + def evict(self, tag, retry=False): """Remove items with matching `tag` from cache. Removing items is an iterative process. In each iteration, a subset of @@ -1510,9 +2054,13 @@ def evict(self, tag): `args` attribute will be the number of items removed before the exception occurred. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) :return: count of rows removed - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ select = ( @@ -1521,10 +2069,9 @@ def evict(self, tag): ' ORDER BY rowid LIMIT ?' ) args = [tag, 0, 100] - return self._select_delete(select, args, arg_index=1) + return self._select_delete(select, args, arg_index=1, retry=retry) - - def expire(self, now=None): + def expire(self, now=None, retry=False): """Remove expired items from cache. Removing items is an iterative process. In each iteration, a subset of @@ -1534,9 +2081,13 @@ def expire(self, now=None): `args` attribute will be the number of items removed before the exception occurred. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + :param float now: current time (default None, ``time.time()`` used) + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ select = ( @@ -1545,10 +2096,9 @@ def expire(self, now=None): ' ORDER BY expire_time LIMIT ?' ) args = [0, now or time.time(), 100] - return self._select_delete(select, args, row_index=1) - + return self._select_delete(select, args, row_index=1, retry=retry) - def cull(self): + def cull(self, retry=False): """Cull items from cache until volume is less than size limit. Removing items is an iterative process. In each iteration, a subset of @@ -1558,8 +2108,12 @@ def cull(self): `args` attribute will be the number of items removed before the exception occurred. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ now = time.time() @@ -1573,13 +2127,13 @@ def cull(self): select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] if select_policy is None: - return + return 0 select_filename = select_policy.format(fields='filename', now=now) try: while self.volume() > self.size_limit: - with self._transact() as (sql, cleanup): + with self._transact(retry) as (sql, cleanup): rows = sql(select_filename, (10,)).fetchall() if not rows: @@ -1592,15 +2146,14 @@ def cull(self): ) sql(delete, (10,)) - for filename, in rows: + for (filename,) in rows: cleanup(filename) except Timeout: - raise Timeout(count) + raise Timeout(count) from None return count - - def clear(self): + def clear(self, retry=False): """Remove all items from cache. Removing items is an iterative process. In each iteration, a subset of @@ -1610,8 +2163,12 @@ def clear(self): `args` attribute will be the number of items removed before the exception occurred. + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) :return: count of rows removed - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ select = ( @@ -1620,16 +2177,17 @@ def clear(self): ' ORDER BY rowid LIMIT ?' ) args = [0, 100] - return self._select_delete(select, args) - + return self._select_delete(select, args, retry=retry) - def _select_delete(self, select, args, row_index=0, arg_index=0): + def _select_delete( + self, select, args, row_index=0, arg_index=0, retry=False + ): count = 0 delete = 'DELETE FROM Cache WHERE rowid IN (%s)' try: while True: - with self._transact() as (sql, cleanup): + with self._transact(retry) as (sql, cleanup): rows = sql(select, args).fetchall() if not rows: @@ -1643,16 +2201,14 @@ def _select_delete(self, select, args, row_index=0, arg_index=0): cleanup(row[-1]) except Timeout: - raise Timeout(count) + raise Timeout(count) from None return count - def iterkeys(self, reverse=False): """Iterate Cache keys in database sort order. - >>> cache = Cache('/tmp/diskcache') - >>> _ = cache.clear() + >>> cache = Cache() >>> for key in [4, 1, 3, 0, 2]: ... cache[key] = key >>> list(cache.iterkeys()) @@ -1692,7 +2248,7 @@ def iterkeys(self, reverse=False): row = sql(select).fetchall() if row: - (key, raw), = row + ((key, raw),) = row else: return @@ -1707,11 +2263,10 @@ def iterkeys(self, reverse=False): for key, raw in rows: yield _disk_get(key, raw) - def _iter(self, ascending=True): sql = self._sql rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() - (max_rowid,), = rows + ((max_rowid,),) = rows yield # Signal ready. if max_rowid is None: @@ -1741,21 +2296,18 @@ def _iter(self, ascending=True): for rowid, key, raw in rows: yield _disk_get(key, raw) - def __iter__(self): - "Iterate keys in cache including expired items." + """Iterate keys in cache including expired items.""" iterator = self._iter() next(iterator) return iterator - def __reversed__(self): - "Reverse iterate keys in cache including expired items." + """Reverse iterate keys in cache including expired items.""" iterator = self._iter(ascending=False) next(iterator) return iterator - def stats(self, enable=True, reset=False): """Return cache statistics hits and misses. @@ -1775,22 +2327,18 @@ def stats(self, enable=True, reset=False): return result - def volume(self): """Return estimated total size of cache on disk. :return: size in bytes """ - (page_count,), = self._sql('PRAGMA page_count').fetchall() + ((page_count,),) = self._sql('PRAGMA page_count').fetchall() total_size = self._page_size * page_count + self.reset('size') return total_size - def close(self): - """Close database connection. - - """ + """Close database connection.""" con = getattr(self._local, 'con', None) if con is None: @@ -1803,28 +2351,25 @@ def close(self): except AttributeError: pass - def __enter__(self): + # Create connection in thread. + # pylint: disable=unused-variable + connection = self._con # noqa return self - def __exit__(self, *exception): self.close() - def __len__(self): - "Count of items in cache including expired items." + """Count of items in cache including expired items.""" return self.reset('count') - def __getstate__(self): return (self.directory, self.timeout, type(self.disk)) - def __setstate__(self, state): self.__init__(*state) - def reset(self, key, value=ENOVAL, update=True): """Reset `key` and `value` item from Settings table. @@ -1850,54 +2395,61 @@ def reset(self, key, value=ENOVAL, update=True): :param value: value for item (optional) :param bool update: update database Settings table (default True) :return: updated value for item - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ + sql = self._sql + sql_retry = self._sql_retry + if value is ENOVAL: select = 'SELECT value FROM Settings WHERE key = ?' - (value,), = self._sql(select, (key,)).fetchall() + ((value,),) = sql_retry(select, (key,)).fetchall() setattr(self, key, value) return value - else: - if update: - with self._transact() as (sql, _): - statement = 'UPDATE Settings SET value = ? WHERE key = ?' - sql(statement, (value, key)) - else: - sql = self._sql - - if key.startswith('sqlite_'): - # 2016-02-17 GrantJ - PRAGMA and autocommit_level=None - # don't always play nicely together. Retry setting the - # PRAGMA. I think some PRAGMA statements expect to - # immediately take an EXCLUSIVE lock on the database. I - # can't find any documentation for this but without the - # retry, stress will intermittently fail with multiple - # processes. + if update: + statement = 'UPDATE Settings SET value = ? WHERE key = ?' + sql_retry(statement, (value, key)) - pause = 0.001 - count = 60000 # 60 / 0.001 - error = sqlite3.OperationalError - pragma = key[7:] + if key.startswith('sqlite_'): + pragma = key[7:] - for _ in range(count): - try: - args = pragma, value - sql('PRAGMA %s = %s' % args).fetchall() - except sqlite3.OperationalError as exc: - error = exc - time.sleep(pause) - else: - break - else: - raise error + # 2016-02-17 GrantJ - PRAGMA and isolation_level=None + # don't always play nicely together. Retry setting the + # PRAGMA. I think some PRAGMA statements expect to + # immediately take an EXCLUSIVE lock on the database. I + # can't find any documentation for this but without the + # retry, stress will intermittently fail with multiple + # processes. - del error + # 2018-11-05 GrantJ - Avoid setting pragma values that + # are already set. Pragma settings like auto_vacuum and + # journal_mode can take a long time or may not work after + # tables have been created. - elif key.startswith('disk_'): - attr = key[5:] - setattr(self._disk, attr, value) + start = time.time() + while True: + try: + try: + ((old_value,),) = sql( + 'PRAGMA %s' % (pragma) + ).fetchall() + update = old_value != value + except ValueError: + update = True + if update: + sql('PRAGMA %s = %s' % (pragma, value)).fetchall() + break + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + elif key.startswith('disk_'): + attr = key[5:] + setattr(self._disk, attr, value) - setattr(self, key, value) - return value + setattr(self, key, value) + return value diff --git a/diskcache/djangocache.py b/diskcache/djangocache.py index 3ec07da..5dc8ce2 100644 --- a/diskcache/djangocache.py +++ b/diskcache/djangocache.py @@ -1,18 +1,22 @@ -"Django-compatible disk and file backed cache." +"""Django-compatible disk and file backed cache.""" + +from functools import wraps from django.core.cache.backends.base import BaseCache try: from django.core.cache.backends.base import DEFAULT_TIMEOUT -except ImportError: +except ImportError: # pragma: no cover # For older versions of Django simply use 300 seconds. DEFAULT_TIMEOUT = 300 +from .core import ENOVAL, args_to_key, full_name from .fanout import FanoutCache class DjangoCache(BaseCache): - "Django-compatible disk and file backed cache." + """Django-compatible disk and file backed cache.""" + def __init__(self, directory, params): """Initialize DjangoCache instance. @@ -20,30 +24,35 @@ def __init__(self, directory, params): :param dict params: cache parameters """ - super(DjangoCache, self).__init__(params) + super().__init__(params) shards = params.get('SHARDS', 8) timeout = params.get('DATABASE_TIMEOUT', 0.010) options = params.get('OPTIONS', {}) - self._directory = directory self._cache = FanoutCache(directory, shards, timeout, **options) - self.memoize = self._cache.memoize - @property def directory(self): """Cache directory.""" - return self._directory + return self._cache.directory + def cache(self, name): + """Return Cache with given `name` in subdirectory. + + :param str name: subdirectory name for Cache + :return: Cache with given name + + """ + return self._cache.cache(name) - def deque(self, name): + def deque(self, name, maxlen=None): """Return Deque with given `name` in subdirectory. :param str name: subdirectory name for Deque + :param maxlen: max length (default None, no max) :return: Deque with given name """ - return self._cache.deque(name) - + return self._cache.deque(name, maxlen=maxlen) def index(self, name): """Return Index with given `name` in subdirectory. @@ -54,9 +63,16 @@ def index(self, name): """ return self._cache.index(name) - - def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, - read=False, tag=None, retry=True): + def add( + self, + key, + value, + timeout=DEFAULT_TIMEOUT, + version=None, + read=False, + tag=None, + retry=True, + ): """Set a value in the cache if the key does not already exist. If timeout is given, that timeout will be used for the key; otherwise the default cache timeout will be used. @@ -70,7 +86,7 @@ def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, :param int version: key version number (default None, cache parameter) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: True if item was added """ @@ -79,9 +95,16 @@ def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, timeout = self.get_backend_timeout(timeout=timeout) return self._cache.add(key, value, timeout, read, tag, retry) - - def get(self, key, default=None, version=None, read=False, - expire_time=False, tag=False, retry=False): + def get( + self, + key, + default=None, + version=None, + read=False, + expire_time=False, + tag=False, + retry=False, + ): """Fetch a given key from the cache. If the key does not exist, return default, which itself defaults to None. @@ -93,7 +116,7 @@ def get(self, key, default=None, version=None, read=False, :param float expire_time: if True, return expire_time in tuple (default False) :param tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: value for item if key is found else default """ @@ -101,7 +124,6 @@ def get(self, key, default=None, version=None, read=False, key = self.make_key(key, version=version) return self._cache.get(key, default, read, expire_time, tag, retry) - def read(self, key, version=None): """Return file handle corresponding to `key` from Cache. @@ -114,9 +136,16 @@ def read(self, key, version=None): key = self.make_key(key, version=version) return self._cache.read(key) - - def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, - read=False, tag=None, retry=True): + def set( + self, + key, + value, + timeout=DEFAULT_TIMEOUT, + version=None, + read=False, + tag=None, + retry=True, + ): """Set a value in the cache. If timeout is given, that timeout will be used for the key; otherwise the default cache timeout will be used. @@ -127,7 +156,7 @@ def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, :param int version: key version number (default None, cache parameter) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: True if item was set """ @@ -136,9 +165,32 @@ def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, timeout = self.get_backend_timeout(timeout=timeout) return self._cache.set(key, value, timeout, read, tag, retry) + def touch(self, key, timeout=DEFAULT_TIMEOUT, version=None, retry=True): + """Touch a key in the cache. If timeout is given, that timeout will be + used for the key; otherwise the default cache timeout will be used. + + :param key: key for item + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool retry: retry if database timeout occurs (default True) + :return: True if key was touched - def pop(self, key, default=None, version=None, expire_time=False, - tag=False, retry=True): + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + timeout = self.get_backend_timeout(timeout=timeout) + return self._cache.touch(key, timeout, retry) + + def pop( + self, + key, + default=None, + version=None, + expire_time=False, + tag=False, + retry=True, + ): """Remove corresponding item for `key` from cache and return value. If `key` is missing, return `default`. @@ -151,27 +203,25 @@ def pop(self, key, default=None, version=None, expire_time=False, :param float expire_time: if True, return expire_time in tuple (default False) :param tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: value for item if key is found else default """ key = self.make_key(key, version=version) return self._cache.pop(key, default, expire_time, tag, retry) - def delete(self, key, version=None, retry=True): """Delete a key from the cache, failing silently. :param key: key for item :param int version: key version number (default None, cache parameter) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: True if item was deleted """ # pylint: disable=arguments-differ key = self.make_key(key, version=version) - self._cache.delete(key, retry) - + return self._cache.delete(key, retry) def incr(self, key, delta=1, version=None, default=None, retry=True): """Increment value by delta for item with key. @@ -190,7 +240,7 @@ def incr(self, key, delta=1, version=None, default=None, retry=True): :param int delta: amount to increment (default 1) :param int version: key version number (default None, cache parameter) :param int default: value if key is missing (default None) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: new value for item on success else None :raises ValueError: if key is not found and default is None @@ -200,8 +250,7 @@ def incr(self, key, delta=1, version=None, default=None, retry=True): try: return self._cache.incr(key, delta, default, retry) except KeyError: - raise ValueError("Key '%s' not found" % key) - + raise ValueError("Key '%s' not found" % key) from None def decr(self, key, delta=1, version=None, default=None, retry=True): """Decrement value by delta for item with key. @@ -223,7 +272,7 @@ def decr(self, key, delta=1, version=None, default=None, retry=True): :param int delta: amount to decrement (default 1) :param int version: key version number (default None, cache parameter) :param int default: value if key is missing (default None) - :param bool retry: retry if database timeout expires (default True) + :param bool retry: retry if database timeout occurs (default True) :return: new value for item on success else None :raises ValueError: if key is not found and default is None @@ -231,7 +280,6 @@ def decr(self, key, delta=1, version=None, default=None, retry=True): # pylint: disable=arguments-differ return self.incr(key, -delta, version, default, retry) - def has_key(self, key, version=None): """Returns True if the key is in the cache and has not expired. @@ -243,7 +291,6 @@ def has_key(self, key, version=None): key = self.make_key(key, version=version) return key in self._cache - def expire(self): """Remove expired items from cache. @@ -252,7 +299,6 @@ def expire(self): """ return self._cache.expire() - def stats(self, enable=True, reset=False): """Return cache statistics hits and misses. @@ -263,27 +309,24 @@ def stats(self, enable=True, reset=False): """ return self._cache.stats(enable=enable, reset=reset) - def create_tag_index(self): """Create tag index on cache database. - It is better to initialize cache with `tag_index=True` than use this. + Better to initialize cache with `tag_index=True` than use this. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ self._cache.create_tag_index() - def drop_tag_index(self): """Drop tag index on cache database. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ self._cache.drop_tag_index() - def evict(self, tag): """Remove items with matching `tag` from cache. @@ -293,7 +336,6 @@ def evict(self, tag): """ return self._cache.evict(tag) - def cull(self): """Cull items from cache until volume is less than size limit. @@ -302,18 +344,15 @@ def cull(self): """ return self._cache.cull() - def clear(self): - "Remove *all* values from the cache at once." + """Remove *all* values from the cache at once.""" return self._cache.clear() - def close(self, **kwargs): - "Close the cache connection." + """Close the cache connection.""" # pylint: disable=unused-argument self._cache.close() - def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT): """Return seconds to expiration. @@ -327,3 +366,91 @@ def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT): # ticket 21147 - avoid time.time() related precision issues timeout = -1 return None if timeout is None else timeout + + def memoize( + self, + name=None, + timeout=DEFAULT_TIMEOUT, + version=None, + typed=False, + tag=None, + ignore=(), + ): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When timeout is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. + + :param str name: name given for callable (default None, automatic) + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool typed: cache different types separately (default False) + :param str tag: text to associate with arguments (default None) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in Cache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + """Decorator created by memoize() for callable `func`.""" + base = (full_name(func),) if name is None else (name,) + + @wraps(func) + def wrapper(*args, **kwargs): + """Wrapper for callable to cache arguments and return values.""" + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, ENOVAL, version, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + valid_timeout = ( + timeout is None + or timeout == DEFAULT_TIMEOUT + or timeout > 0 + ) + if valid_timeout: + self.set( + key, + result, + timeout, + version, + tag=tag, + retry=True, + ) + + return result + + def __cache_key__(*args, **kwargs): + """Make key for cache given function arguments.""" + return args_to_key(base, args, kwargs, typed, ignore) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator diff --git a/diskcache/fanout.py b/diskcache/fanout.py index 565537a..9822ee4 100644 --- a/diskcache/fanout.py +++ b/diskcache/fanout.py @@ -1,19 +1,24 @@ -"Fanout cache automatically shards keys and values." +"""Fanout cache automatically shards keys and values.""" +import contextlib as cl +import functools import itertools as it +import operator import os.path as op import sqlite3 +import tempfile import time -from .core import ENOVAL, DEFAULT_SETTINGS, Cache, Disk, Timeout -from .memo import memoize +from .core import DEFAULT_SETTINGS, ENOVAL, Cache, Disk, Timeout from .persistent import Deque, Index -class FanoutCache(object): - "Cache that shards keys and values." - def __init__(self, directory, shards=8, timeout=0.010, disk=Disk, - **settings): +class FanoutCache: + """Cache that shards keys and values.""" + + def __init__( + self, directory=None, shards=8, timeout=0.010, disk=Disk, **settings + ): """Initialize cache instance. :param str directory: cache directory @@ -23,34 +28,76 @@ def __init__(self, directory, shards=8, timeout=0.010, disk=Disk, :param settings: any of `DEFAULT_SETTINGS` """ - self._directory = directory - self._count = shards + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = str(directory) + directory = op.expanduser(directory) + directory = op.expandvars(directory) + default_size_limit = DEFAULT_SETTINGS['size_limit'] size_limit = settings.pop('size_limit', default_size_limit) / shards + + self._count = shards + self._directory = directory + self._disk = disk self._shards = tuple( Cache( - op.join(directory, '%03d' % num), + directory=op.join(directory, '%03d' % num), timeout=timeout, disk=disk, size_limit=size_limit, - **settings + **settings, ) for num in range(shards) ) self._hash = self._shards[0].disk.hash + self._caches = {} self._deques = {} self._indexes = {} - @property def directory(self): """Cache directory.""" return self._directory - def __getattr__(self, name): + safe_names = {'timeout', 'disk'} + valid_name = name in DEFAULT_SETTINGS or name in safe_names + assert valid_name, 'cannot access {} in cache shard'.format(name) return getattr(self._shards[0], name) + @cl.contextmanager + def transact(self, retry=True): + """Context manager to perform a transaction by locking the cache. + + While the cache is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + Blocks until transactions are held on all cache shards by retrying as + necessary. + + >>> cache = FanoutCache() + >>> with cache.transact(): # Atomically increment two keys. + ... _ = cache.incr('total', 123.4) + ... _ = cache.incr('count', 1) + >>> with cache.transact(): # Atomically calculate average. + ... average = cache['total'] / cache['count'] + >>> average + 123.4 + + :return: context manager for use in `with` statement + + """ + assert retry, 'retry must be True in FanoutCache' + with cl.ExitStack() as stack: + for shard in self._shards: + shard_transaction = shard.transact(retry=True) + stack.enter_context(shard_transaction) + yield def set(self, key, value, expire=None, read=False, tag=None, retry=False): """Set `key` and `value` item in cache. @@ -58,38 +105,58 @@ def set(self, key, value, expire=None, read=False, tag=None, retry=False): When `read` is `True`, `value` should be a file-like object opened for reading in binary mode. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item :param value: value for item :param float expire: seconds until the key expires (default None, no expiry) :param bool read: read value as raw bytes from file (default False) :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: True if item was set """ index = self._hash(key) % self._count - set_func = self._shards[index].set - - while True: - try: - return set_func(key, value, expire, read, tag) - except Timeout: - if retry: - continue - else: - return False - + shard = self._shards[index] + try: + return shard.set(key, value, expire, read, tag, retry) + except Timeout: + return False def __setitem__(self, key, value): """Set `key` and `value` item in cache. + Calls :func:`FanoutCache.set` internally with `retry` set to `True`. + :param key: key for item :param value: value for item """ - self.set(key, value, retry=True) + index = self._hash(key) % self._count + shard = self._shards[index] + shard[key] = value + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.touch(key, expire, retry) + except Timeout: + return False def add(self, key, value, expire=None, read=False, tag=None, retry=False): """Add `key` and `value` item to cache. @@ -102,28 +169,25 @@ def add(self, key, value, expire=None, read=False, tag=None, retry=False): When `read` is `True`, `value` should be a file-like object opened for reading in binary mode. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item :param value: value for item :param float expire: seconds until the key expires (default None, no expiry) :param bool read: read value as bytes from file (default False) :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: True if item was added """ index = self._hash(key) % self._count - add_func = self._shards[index].add - - while True: - try: - return add_func(key, value, expire, read, tag) - except Timeout: - if retry: - continue - else: - return False - + shard = self._shards[index] + try: + return shard.add(key, value, expire, read, tag, retry) + except Timeout: + return False def incr(self, key, delta=1, default=0, retry=False): """Increment value by delta for item with key. @@ -138,26 +202,23 @@ def incr(self, key, delta=1, default=0, retry=False): machines with 64-bit pointer widths will support 64-bit signed integers. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item :param int delta: amount to increment (default 1) :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: new value for item on success else None :raises KeyError: if key is not found and default is None """ index = self._hash(key) % self._count - incr_func = self._shards[index].incr - - while True: - try: - return incr_func(key, delta, default) - except Timeout: - if retry: - continue - else: - return None - + shard = self._shards[index] + try: + return shard.incr(key, delta, default, retry) + except Timeout: + return None def decr(self, key, delta=1, default=0, retry=False): """Decrement value by delta for item with key. @@ -175,21 +236,38 @@ def decr(self, key, delta=1, default=0, retry=False): machines with 64-bit pointer widths will support 64-bit signed integers. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item :param int delta: amount to decrement (default 1) :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: new value for item on success else None :raises KeyError: if key is not found and default is None """ - return self.incr(key, -delta, default, retry) - - - def get(self, key, default=None, read=False, expire_time=False, tag=False, - retry=False): + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.decr(key, delta, default, retry) + except Timeout: + return None + + def get( + self, + key, + default=None, + read=False, + expire_time=False, + tag=False, + retry=False, + ): """Retrieve value from cache. If `key` is missing, return `default`. + If database timeout occurs then returns `default` unless `retry` is set + to `True` (default `False`). + :param key: key for item :param default: return value if key is missing (default None) :param bool read: if True, return file handle to value @@ -197,41 +275,30 @@ def get(self, key, default=None, read=False, expire_time=False, tag=False, :param float expire_time: if True, return expire_time in tuple (default False) :param tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: value for item if key is found else default """ index = self._hash(key) % self._count - get_func = self._shards[index].get - - while True: - try: - return get_func( - key, default=default, read=read, expire_time=expire_time, - tag=tag, - ) - except (Timeout, sqlite3.OperationalError): - if retry: - continue - else: - return default - + shard = self._shards[index] + try: + return shard.get(key, default, read, expire_time, tag, retry) + except (Timeout, sqlite3.OperationalError): + return default def __getitem__(self, key): """Return corresponding value for `key` from cache. + Calls :func:`FanoutCache.get` internally with `retry` set to `True`. + :param key: key for item :return: value for item :raises KeyError: if key is not found """ - value = self.get(key, default=ENOVAL, retry=True) - - if value is ENOVAL: - raise KeyError(key) - - return value - + index = self._hash(key) % self._count + shard = self._shards[index] + return shard[key] def read(self, key): """Return file handle corresponding to `key` from cache. @@ -246,7 +313,6 @@ def read(self, key): raise KeyError(key) return handle - def __contains__(self, key): """Return `True` if `key` matching item is found in cache. @@ -255,83 +321,71 @@ def __contains__(self, key): """ index = self._hash(key) % self._count - return key in self._shards[index] + shard = self._shards[index] + return key in shard - - def pop(self, key, default=None, expire_time=False, tag=False, - retry=False): + def pop( + self, key, default=None, expire_time=False, tag=False, retry=False + ): # noqa: E501 """Remove corresponding item for `key` from cache and return value. If `key` is missing, return `default`. Operation is atomic. Concurrent operations will be serialized. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item :param default: return value if key is missing (default None) :param float expire_time: if True, return expire_time in tuple (default False) :param tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: value for item if key is found else default """ index = self._hash(key) % self._count - pop_func = self._shards[index].pop - - while True: - try: - return pop_func( - key, default=default, expire_time=expire_time, tag=tag, - ) - except Timeout: - if retry: - continue - else: - return default - + shard = self._shards[index] + try: + return shard.pop(key, default, expire_time, tag, retry) + except Timeout: + return default def delete(self, key, retry=False): """Delete corresponding item for `key` from cache. Missing keys are ignored. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param key: key for item - :param bool retry: retry if database timeout expires (default False) + :param bool retry: retry if database timeout occurs (default False) :return: True if item was deleted """ index = self._hash(key) % self._count - del_func = self._shards[index].__delitem__ - - while True: - try: - return del_func(key) - except Timeout: - if retry: - continue - else: - return False - except KeyError: - return False - + shard = self._shards[index] + try: + return shard.delete(key, retry) + except Timeout: + return False def __delitem__(self, key): """Delete corresponding item for `key` from cache. + Calls :func:`FanoutCache.delete` internally with `retry` set to `True`. + :param key: key for item :raises KeyError: if key is not found """ - deleted = self.delete(key, retry=True) - - if not deleted: - raise KeyError(key) - - - memoize = memoize - + index = self._hash(key) % self._count + shard = self._shards[index] + del shard[key] - def check(self, fix=False): + def check(self, fix=False, retry=False): """Check database and file system consistency. Intended for use in testing and post-mortem error analysis. @@ -342,80 +396,94 @@ def check(self, fix=False): held for a long time. For example, local benchmarking shows that a cache with 1,000 file references takes ~60ms to check. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) :return: list of warnings - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ - return sum((shard.check(fix=fix) for shard in self._shards), []) - + warnings = (shard.check(fix, retry) for shard in self._shards) + return functools.reduce(operator.iadd, warnings, []) - def expire(self): + def expire(self, retry=False): """Remove expired items from cache. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed """ - return self._remove('expire', args=(time.time(),)) - + return self._remove('expire', args=(time.time(),), retry=retry) def create_tag_index(self): """Create tag index on cache database. - It is better to initialize cache with `tag_index=True` than use this. + Better to initialize cache with `tag_index=True` than use this. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ for shard in self._shards: shard.create_tag_index() - def drop_tag_index(self): """Drop tag index on cache database. - :raises Timeout: if database timeout expires + :raises Timeout: if database timeout occurs """ for shard in self._shards: shard.drop_tag_index() - - def evict(self, tag): + def evict(self, tag, retry=False): """Remove items with matching `tag` from cache. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed """ - return self._remove('evict', args=(tag,)) - + return self._remove('evict', args=(tag,), retry=retry) - def cull(self): + def cull(self, retry=False): """Cull items from cache until volume is less than size limit. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed """ - return self._remove('cull') + return self._remove('cull', retry=retry) - - def clear(self): + def clear(self, retry=False): """Remove all items from cache. + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) :return: count of items removed """ - return self._remove('clear') + return self._remove('clear', retry=retry) - - def _remove(self, name, args=()): + def _remove(self, name, args=(), retry=False): total = 0 for shard in self._shards: method = getattr(shard, name) while True: try: - count = method(*args) + count = method(*args, retry=retry) total += count except Timeout as timeout: total += timeout.args[0] @@ -423,7 +491,6 @@ def _remove(self, name, args=()): break return total - def stats(self, enable=True, reset=False): """Return cache statistics hits and misses. @@ -433,9 +500,9 @@ def stats(self, enable=True, reset=False): """ results = [shard.stats(enable, reset) for shard in self._shards] - return (sum(result[0] for result in results), - sum(result[1] for result in results)) - + total_hits = sum(hits for hits, _ in results) + total_misses = sum(misses for _, misses in results) + return total_hits, total_misses def volume(self): """Return estimated total size of cache on disk. @@ -445,48 +512,40 @@ def volume(self): """ return sum(shard.volume() for shard in self._shards) - def close(self): - "Close database connection." + """Close database connection.""" for shard in self._shards: shard.close() + self._caches.clear() self._deques.clear() self._indexes.clear() - def __enter__(self): return self - def __exit__(self, *exception): self.close() - def __getstate__(self): return (self._directory, self._count, self.timeout, type(self.disk)) - def __setstate__(self, state): self.__init__(*state) - def __iter__(self): - "Iterate keys in cache including expired items." - iterators = [iter(shard) for shard in self._shards] + """Iterate keys in cache including expired items.""" + iterators = (iter(shard) for shard in self._shards) return it.chain.from_iterable(iterators) - def __reversed__(self): - "Reverse iterate keys in cache including expired items." - iterators = [reversed(shard) for shard in self._shards] - return it.chain.from_iterable(reversed(iterators)) - + """Reverse iterate keys in cache including expired items.""" + iterators = (reversed(shard) for shard in reversed(self._shards)) + return it.chain.from_iterable(iterators) def __len__(self): - "Count of items in cache including expired items." + """Count of items in cache including expired items.""" return sum(len(shard) for shard in self._shards) - def reset(self, key, value=ENOVAL): """Reset `key` and `value` item from Settings table. @@ -503,7 +562,6 @@ def reset(self, key, value=ENOVAL): :param str key: Settings key for item :param value: value for item (optional) :return: updated value for item - :raises Timeout: if database timeout expires """ for shard in self._shards: @@ -516,13 +574,50 @@ def reset(self, key, value=ENOVAL): break return result + def cache(self, name, timeout=60, disk=None, **settings): + """Return Cache with given `name` in subdirectory. + + If disk is none (default), uses the fanout cache disk. + + >>> fanout_cache = FanoutCache() + >>> cache = fanout_cache.cache('test') + >>> cache.set('abc', 123) + True + >>> cache.get('abc') + 123 + >>> len(cache) + 1 + >>> cache.delete('abc') + True + + :param str name: subdirectory name for Cache + :param float timeout: SQLite connection timeout + :param disk: Disk type or subclass for serialization + :param settings: any of DEFAULT_SETTINGS + :return: Cache with given name + + """ + _caches = self._caches - def deque(self, name): + try: + return _caches[name] + except KeyError: + parts = name.split('/') + directory = op.join(self._directory, 'cache', *parts) + temp = Cache( + directory=directory, + timeout=timeout, + disk=self._disk if disk is None else Disk, + **settings, + ) + _caches[name] = temp + return temp + + def deque(self, name, maxlen=None): """Return Deque with given `name` in subdirectory. - >>> cache = FanoutCache('/tmp/diskcache/fanoutcache') + >>> cache = FanoutCache() >>> deque = cache.deque('test') - >>> deque.clear() >>> deque.extend('abc') >>> deque.popleft() 'a' @@ -532,6 +627,7 @@ def deque(self, name): 1 :param str name: subdirectory name for Deque + :param maxlen: max length (default None, no max) :return: Deque with given name """ @@ -542,17 +638,20 @@ def deque(self, name): except KeyError: parts = name.split('/') directory = op.join(self._directory, 'deque', *parts) - temp = Deque(directory=directory) - _deques[name] = temp - return temp - + cache = Cache( + directory=directory, + disk=self._disk, + eviction_policy='none', + ) + deque = Deque.fromcache(cache, maxlen=maxlen) + _deques[name] = deque + return deque def index(self, name): """Return Index with given `name` in subdirectory. - >>> cache = FanoutCache('/tmp/diskcache/fanoutcache') + >>> cache = FanoutCache() >>> index = cache.index('test') - >>> index.clear() >>> index['abc'] = 123 >>> index['def'] = 456 >>> index['ghi'] = 789 @@ -575,6 +674,14 @@ def index(self, name): except KeyError: parts = name.split('/') directory = op.join(self._directory, 'index', *parts) - temp = Index(directory) - _indexes[name] = temp - return temp + cache = Cache( + directory=directory, + disk=self._disk, + eviction_policy='none', + ) + index = Index.fromcache(cache) + _indexes[name] = index + return index + + +FanoutCache.memoize = Cache.memoize # type: ignore diff --git a/diskcache/memo.py b/diskcache/memo.py deleted file mode 100644 index 3a2243a..0000000 --- a/diskcache/memo.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Memoization utilities. - -""" - -from functools import wraps - -from .core import ENOVAL - -def memoize(cache, name=None, typed=False, expire=None, tag=None): - """Memoizing cache decorator. - - Decorator to wrap callable with memoizing function using cache. Repeated - calls with the same arguments will lookup result in cache and avoid - function evaluation. - - If name is set to None (default), the callable name will be determined - automatically. - - If typed is set to True, function arguments of different types will be - cached separately. For example, f(3) and f(3.0) will be treated as distinct - calls with distinct results. - - The original underlying function is accessible through the __wrapped__ - attribute. This is useful for introspection, for bypassing the cache, or - for rewrapping the function with a different cache. - - >>> from diskcache import FanoutCache - >>> cache = FanoutCache('/tmp/diskcache/fanoutcache') - >>> @cache.memoize(typed=True, expire=1, tag='fib') - ... def fibonacci(number): - ... if number == 0: - ... return 0 - ... elif number == 1: - ... return 1 - ... else: - ... return fibonacci(number - 1) + fibonacci(number - 2) - >>> print(sum(fibonacci(number=value) for value in range(100))) - 573147844013817084100 - - Remember to call memoize when decorating a callable. If you forget, then a - TypeError will occur. Note the lack of parenthenses after memoize below: - - >>> @cache.memoize - ... def test(): - ... pass - Traceback (most recent call last): - ... - TypeError: name cannot be callable - - :param cache: cache to store callable arguments and return values - :param str name: name given for callable (default None, automatic) - :param bool typed: cache different types separately (default False) - :param float expire: seconds until arguments expire - (default None, no expiry) - :param str tag: text to associate with arguments (default None) - :return: callable decorator - - """ - if callable(name): - raise TypeError('name cannot be callable') - - def decorator(function): - "Decorator created by memoize call for callable." - if name is None: - try: - reference = function.__qualname__ - except AttributeError: - reference = function.__name__ - - reference = function.__module__ + reference - else: - reference = name - - reference = (reference,) - - @wraps(function) - def wrapper(*args, **kwargs): - "Wrapper for callable to cache arguments and return values." - - key = reference + args - - if kwargs: - key += (ENOVAL,) - sorted_items = sorted(kwargs.items()) - - for item in sorted_items: - key += item - - if typed: - key += tuple(type(arg) for arg in args) - - if kwargs: - key += tuple(type(value) for _, value in sorted_items) - - result = cache.get(key, default=ENOVAL, retry=True) - - if result is ENOVAL: - result = function(*args, **kwargs) - cache.set(key, result, expire=expire, tag=tag, retry=True) - - return result - - return wrapper - - return decorator diff --git a/diskcache/persistent.py b/diskcache/persistent.py index 499c350..522bb74 100644 --- a/diskcache/persistent.py +++ b/diskcache/persistent.py @@ -1,27 +1,26 @@ """Persistent Data Types - """ import operator as op -import sys - -from collections import MutableMapping, OrderedDict, Sequence -from collections import KeysView, ValuesView, ItemsView -from itertools import islice +from collections import OrderedDict +from collections.abc import ( + ItemsView, + KeysView, + MutableMapping, + Sequence, + ValuesView, +) +from contextlib import contextmanager from shutil import rmtree -from tempfile import mkdtemp - -from .core import BytesType, Cache, ENOVAL, TextType, Timeout -if sys.hexversion < 0x03000000: - from itertools import izip as zip # pylint: disable=redefined-builtin,no-name-in-module,ungrouped-imports - range = xrange # pylint: disable=redefined-builtin,invalid-name,undefined-variable +from .core import ENOVAL, Cache def _make_compare(seq_op, doc): - "Make compare method with Sequence semantics." + """Make compare method with Sequence semantics.""" + def compare(self, that): - "Compare method for deque and sequence." + """Compare method for deque and sequence.""" if not isinstance(that, Sequence): return NotImplemented @@ -56,10 +55,7 @@ class Deque(Sequence): Items are serialized to disk. Deque may be initialized from directory path where items are stored. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque - Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += range(5) >>> list(deque) [0, 1, 2, 3, 4] @@ -78,7 +74,8 @@ class Deque(Sequence): [3, 2, 1, 0, 0, -1, -2, -3] """ - def __init__(self, iterable=(), directory=None): + + def __init__(self, iterable=(), directory=None, maxlen=None): """Initialize deque instance. If directory is None then temporary directory created. The directory @@ -88,19 +85,18 @@ def __init__(self, iterable=(), directory=None): :param directory: deque directory (default None) """ - if directory is None: - directory = mkdtemp() self._cache = Cache(directory, eviction_policy='none') - self.extend(iterable) - + self._maxlen = float('inf') if maxlen is None else maxlen + self._extend(iterable) @classmethod - def fromcache(cls, cache, iterable=()): + def fromcache(cls, cache, iterable=(), maxlen=None): """Initialize deque using `cache`. - >>> cache = Cache('/tmp/diskcache/index') - >>> _ = cache.clear() + >>> cache = Cache() >>> deque = Deque.fromcache(cache, [5, 6, 7, 8]) + >>> deque.cache is cache + True >>> len(deque) 4 >>> 7 in deque @@ -116,80 +112,103 @@ def fromcache(cls, cache, iterable=()): # pylint: disable=no-member,protected-access self = cls.__new__(cls) self._cache = cache - self.extend(iterable) + self._maxlen = float('inf') if maxlen is None else maxlen + self._extend(iterable) return self + @property + def cache(self): + """Cache used by deque.""" + return self._cache @property def directory(self): - "Directory path where deque is stored." + """Directory path where deque is stored.""" return self._cache.directory + @property + def maxlen(self): + """Max length of the deque.""" + return self._maxlen + + @maxlen.setter + def maxlen(self, value): + """Set max length of the deque. - def _key(self, index): + Pops items from left while length greater than max. + + >>> deque = Deque() + >>> deque.extendleft('abcde') + >>> deque.maxlen = 3 + >>> list(deque) + ['c', 'd', 'e'] + + :param value: max length + + """ + self._maxlen = value + with self._cache.transact(retry=True): + while len(self._cache) > self._maxlen: + self._popleft() + + def _index(self, index, func): len_self = len(self) - if index < 0: - index += len_self - if index < 0: + if index >= 0: + if index >= len_self: raise IndexError('deque index out of range') - elif index >= len_self: - raise IndexError('deque index out of range') - diff = len_self - index - 1 - _cache_iterkeys = self._cache.iterkeys + for key in self._cache.iterkeys(): + if index == 0: + try: + return func(key) + except KeyError: + continue + index -= 1 + else: + if index < -len_self: + raise IndexError('deque index out of range') - try: - if index <= diff: - iter_keys = _cache_iterkeys() - key = next(islice(iter_keys, index, index + 1)) - else: - iter_keys = _cache_iterkeys(reverse=True) - key = next(islice(iter_keys, diff, diff + 1)) - except StopIteration: - raise IndexError('deque index out of range') + index += 1 - return key + for key in self._cache.iterkeys(reverse=True): + if index == 0: + try: + return func(key) + except KeyError: + continue + index += 1 + raise IndexError('deque index out of range') def __getitem__(self, index): """deque.__getitem__(index) <==> deque[index] Return corresponding item for `index` in deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + See also `Deque.peekleft` and `Deque.peek` for indexing deque at index + ``0`` or ``-1``. + + >>> deque = Deque() >>> deque.extend('abcde') - >>> deque[0] - 'a' - >>> deque[-1] - 'e' - >>> deque[2] - 'c' + >>> deque[1] + 'b' + >>> deque[-2] + 'd' :param int index: index of item :return: corresponding item :raises IndexError: if index out of range """ - _key = self._key - _cache = self._cache - - while True: - try: - key = _key(index) - return _cache[key] - except (KeyError, Timeout): - continue - + return self._index(index, self._cache.__getitem__) def __setitem__(self, index, value): """deque.__setitem__(index, value) <==> deque[index] = value Store `value` in deque at `index`. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.extend([None] * 3) >>> deque[0] = 'a' >>> deque[1] = 'b' @@ -202,25 +221,18 @@ def __setitem__(self, index, value): :raises IndexError: if index out of range """ - _key = self._key - _cache = self._cache - while True: - try: - key = _key(index) - _cache[key] = value - return - except Timeout: - continue + def _set_value(key): + return self._cache.__setitem__(key, value) + self._index(index, _set_value) def __delitem__(self, index): """deque.__delitem__(index) <==> del deque[index] Delete item in deque at `index`. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.extend([None] * 3) >>> del deque[0] >>> del deque[1] @@ -232,17 +244,7 @@ def __delitem__(self, index): :raises IndexError: if index out of range """ - _key = self._key - _cache = self._cache - - while True: - try: - key = _key(index) - del _cache[key] - return - except (KeyError, Timeout): - continue - + self._index(index, self._cache.__delitem__) def __repr__(self): """deque.__repr__() <==> repr(deque) @@ -253,7 +255,6 @@ def __repr__(self): name = type(self).__name__ return '{0}(directory={1!r})'.format(name, self.directory) - __eq__ = _make_compare(op.eq, 'equal to') __ne__ = _make_compare(op.ne, 'not equal to') __lt__ = _make_compare(op.lt, 'less than') @@ -261,17 +262,18 @@ def __repr__(self): __le__ = _make_compare(op.le, 'less than or equal to') __ge__ = _make_compare(op.ge, 'greater than or equal to') - def __iadd__(self, iterable): """deque.__iadd__(iterable) <==> deque += iterable Extend back side of deque with items from iterable. + :param iterable: iterable of items to append to deque + :return: deque with added items + """ - self.extend(iterable) + self._extend(iterable) return self - def __iter__(self): """deque.__iter__() <==> iter(deque) @@ -283,10 +285,9 @@ def __iter__(self): for key in _cache.iterkeys(): try: yield _cache[key] - except (KeyError, Timeout): + except KeyError: pass - def __len__(self): """deque.__len__() <==> len(deque) @@ -295,14 +296,12 @@ def __len__(self): """ return len(self._cache) - def __reversed__(self): """deque.__reversed__() <==> reversed(deque) Return iterator of deque from back to front. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.extend('abcd') >>> iterator = reversed(deque) >>> next(iterator) @@ -316,23 +315,20 @@ def __reversed__(self): for key in _cache.iterkeys(reverse=True): try: yield _cache[key] - except (KeyError, Timeout): + except KeyError: pass - def __getstate__(self): - return self.directory - + return self.directory, self.maxlen def __setstate__(self, state): - self.__init__(directory=state) - + directory, maxlen = state + self.__init__(directory=directory, maxlen=maxlen) def append(self, value): """Add `value` to back of deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.append('a') >>> deque.append('b') >>> deque.append('c') @@ -342,21 +338,17 @@ def append(self, value): :param value: value to add to back of deque """ - _cache_push = self._cache.push - - while True: - try: - _cache_push(value) - return - except Timeout: - continue + with self._cache.transact(retry=True): + self._cache.push(value, retry=True) + if len(self._cache) > self._maxlen: + self._popleft() + _append = append def appendleft(self, value): """Add `value` to front of deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.appendleft('a') >>> deque.appendleft('b') >>> deque.appendleft('c') @@ -366,35 +358,37 @@ def appendleft(self, value): :param value: value to add to front of deque """ - _cache_push = self._cache.push - - while True: - try: - _cache_push(value, side='front') - return - except Timeout: - continue + with self._cache.transact(retry=True): + self._cache.push(value, side='front', retry=True) + if len(self._cache) > self._maxlen: + self._pop() + _appendleft = appendleft def clear(self): """Remove all elements from deque. + >>> deque = Deque('abc') + >>> len(deque) + 3 + >>> deque.clear() + >>> list(deque) + [] + """ - _cache_clear = self._cache.clear + self._cache.clear(retry=True) - while True: - try: - _cache_clear() - return - except Timeout: - continue + _clear = clear + def copy(self): + """Copy deque with same directory and max length.""" + TypeSelf = type(self) + return TypeSelf(directory=self.directory, maxlen=self.maxlen) def count(self, value): """Return number of occurrences of `value` in deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += [num for num in range(1, 5) for _ in range(num)] >>> deque.count(0) 0 @@ -404,10 +398,10 @@ def count(self, value): 4 :param value: value to count in deque + :return: count of items equal to value in deque """ - return sum(1 for item in self if item == value) - + return sum(1 for item in self if value == item) def extend(self, iterable): """Extend back side of deque with values from `iterable`. @@ -416,14 +410,14 @@ def extend(self, iterable): """ for value in iterable: - self.append(value) + self._append(value) + _extend = extend def extendleft(self, iterable): """Extend front side of deque with value from `iterable`. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque.extendleft('abc') >>> list(deque) ['c', 'b', 'a'] @@ -432,16 +426,66 @@ def extendleft(self, iterable): """ for value in iterable: - self.appendleft(value) + self._appendleft(value) + + def peek(self): + """Peek at value at back of deque. + + Faster than indexing deque at -1. + + If deque is empty then raise IndexError. + + >>> deque = Deque() + >>> deque.peek() + Traceback (most recent call last): + ... + IndexError: peek from an empty deque + >>> deque += 'abc' + >>> deque.peek() + 'c' + + :return: value at back of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.peek(default=default, side='back', retry=True) + if value is ENOVAL: + raise IndexError('peek from an empty deque') + return value + + def peekleft(self): + """Peek at value at front of deque. + + Faster than indexing deque at 0. + If deque is empty then raise IndexError. + + >>> deque = Deque() + >>> deque.peekleft() + Traceback (most recent call last): + ... + IndexError: peek from an empty deque + >>> deque += 'abc' + >>> deque.peekleft() + 'a' + + :return: value at front of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.peek(default=default, side='front', retry=True) + if value is ENOVAL: + raise IndexError('peek from an empty deque') + return value def pop(self): """Remove and return value at back of deque. If deque is empty then raise IndexError. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += 'ab' >>> deque.pop() 'b' @@ -452,28 +496,22 @@ def pop(self): ... IndexError: pop from an empty deque + :return: value at back of deque :raises IndexError: if deque is empty """ - _cache_pull = self._cache.pull - - while True: - try: - default = None, ENOVAL - _, value = _cache_pull(default=default, side='back') - except Timeout: - continue - else: - if value is ENOVAL: - raise IndexError('pop from an empty deque') - return value + default = None, ENOVAL + _, value = self._cache.pull(default=default, side='back', retry=True) + if value is ENOVAL: + raise IndexError('pop from an empty deque') + return value + _pop = pop def popleft(self): """Remove and return value at front of deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += 'ab' >>> deque.popleft() 'a' @@ -484,26 +522,22 @@ def popleft(self): ... IndexError: pop from an empty deque - """ - _cache_pull = self._cache.pull + :return: value at front of deque + :raises IndexError: if deque is empty - while True: - try: - default = None, ENOVAL - _, value = _cache_pull(default=default) - except Timeout: - continue - else: - if value is ENOVAL: - raise IndexError('pop from an empty deque') - return value + """ + default = None, ENOVAL + _, value = self._cache.pull(default=default, retry=True) + if value is ENOVAL: + raise IndexError('pop from an empty deque') + return value + _popleft = popleft def remove(self, value): """Remove first occurrence of `value` in deque. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += 'aab' >>> deque.remove('a') >>> list(deque) @@ -524,57 +558,49 @@ def remove(self, value): for key in _cache.iterkeys(): try: - while True: - try: - item = _cache[key] - except Timeout: - continue - else: - break + item = _cache[key] except KeyError: continue else: if value == item: try: - while True: - try: - del _cache[key] - except Timeout: - continue - else: - return + del _cache[key] except KeyError: continue + return raise ValueError('deque.remove(value): value not in deque') - def reverse(self): """Reverse deque in place. + >>> deque = Deque() + >>> deque += 'abc' + >>> deque.reverse() + >>> list(deque) + ['c', 'b', 'a'] + """ # pylint: disable=protected-access - directory = mkdtemp() - temp = None - - try: - temp = Deque(iterable=reversed(self), directory=directory) - self.clear() - self.extend(temp) - finally: - if temp is not None: - temp._cache.close() - del temp - rmtree(directory) - + # GrantJ 2019-03-22 Consider using an algorithm that swaps the values + # at two keys. Like self._cache.swap(key1, key2, retry=True) The swap + # method would exchange the values at two given keys. Then, using a + # forward iterator and a reverse iterator, the reverse method could + # avoid making copies of the values. + temp = Deque(iterable=reversed(self)) + self._clear() + self._extend(temp) + directory = temp.directory + temp._cache.close() + del temp + rmtree(directory) def rotate(self, steps=1): """Rotate deque right by `steps`. If steps is negative then rotate left. - >>> deque = Deque(directory='/tmp/diskcache/deque') - >>> deque.clear() + >>> deque = Deque() >>> deque += range(5) >>> deque.rotate(2) >>> list(deque) @@ -600,29 +626,50 @@ def rotate(self, steps=1): for _ in range(steps): try: - value = self.pop() + value = self._pop() except IndexError: return else: - self.appendleft(value) + self._appendleft(value) else: steps *= -1 steps %= len_self for _ in range(steps): try: - value = self.popleft() + value = self._popleft() except IndexError: return else: - self.append(value) + self._append(value) + + __hash__ = None # type: ignore + @contextmanager + def transact(self): + """Context manager to perform a transaction by locking the deque. - def __del__(self): - self._cache.close() + While the deque is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + Transactions may be nested and may not be shared between threads. - __hash__ = None + >>> from diskcache import Deque + >>> deque = Deque() + >>> deque += range(5) + >>> with deque.transact(): # Atomically rotate elements. + ... value = deque.pop() + ... deque.appendleft(value) + >>> list(deque) + [4, 0, 1, 2, 3] + + :return: context manager for use in `with` statement + + """ + with self._cache.transact(retry=True): + yield class Index(MutableMapping): @@ -634,10 +681,7 @@ class Index(MutableMapping): Hashing protocol is not used. Keys are looked up by their serialized format. See ``diskcache.Disk`` for details. - >>> index = Index('/tmp/diskcache/index') - >>> index - Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update([('a', 1), ('b', 2), ('c', 3)]) >>> index['a'] 1 @@ -650,6 +694,7 @@ class Index(MutableMapping): ('c', 3) """ + def __init__(self, *args, **kwargs): """Initialize index in directory and update items. @@ -667,24 +712,26 @@ def __init__(self, *args, **kwargs): 4 """ - if args and isinstance(args[0], (BytesType, TextType)): + if args and isinstance(args[0], (bytes, str)): directory = args[0] args = args[1:] else: if args and args[0] is None: args = args[1:] - directory = mkdtemp(prefix='diskcache-') + directory = None self._cache = Cache(directory, eviction_policy='none') - self.update(*args, **kwargs) + self._update(*args, **kwargs) + _update = MutableMapping.update @classmethod def fromcache(cls, cache, *args, **kwargs): """Initialize index using `cache` and update items. - >>> cache = Cache('/tmp/diskcache/index') - >>> _ = cache.clear() + >>> cache = Cache() >>> index = Index.fromcache(cache, {'a': 1, 'b': 2, 'c': 3}) + >>> index.cache is cache + True >>> len(index) 3 >>> 'b' in index @@ -701,23 +748,25 @@ def fromcache(cls, cache, *args, **kwargs): # pylint: disable=no-member,protected-access self = cls.__new__(cls) self._cache = cache - self.update(*args, **kwargs) + self._update(*args, **kwargs) return self + @property + def cache(self): + """Cache used by index.""" + return self._cache @property def directory(self): - "Directory path where items are stored." + """Directory path where items are stored.""" return self._cache.directory - def __getitem__(self, key): """index.__getitem__(key) <==> index[key] Return corresponding value for `key` in index. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update({'a': 1, 'b': 2}) >>> index['a'] 1 @@ -733,22 +782,14 @@ def __getitem__(self, key): :raises KeyError: if key is not found """ - _cache = self._cache - - while True: - try: - return _cache[key] - except Timeout: - continue - + return self._cache[key] def __setitem__(self, key, value): """index.__setitem__(key, value) <==> index[key] = value Set `key` and `value` item in index. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index['a'] = 1 >>> index[0] = None >>> len(index) @@ -758,24 +799,14 @@ def __setitem__(self, key, value): :param value: value for item """ - _cache = self._cache - - while True: - try: - _cache[key] = value - except Timeout: - continue - else: - return - + self._cache[key] = value def __delitem__(self, key): """index.__delitem__(key) <==> del index[key] Delete corresponding item for `key` from index. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update({'a': 1, 'b': 2}) >>> del index['a'] >>> del index['b'] @@ -790,16 +821,7 @@ def __delitem__(self, key): :raises KeyError: if key is not found """ - _cache = self._cache - - while True: - try: - del _cache[key] - except Timeout: - continue - else: - return - + del self._cache[key] def setdefault(self, key, default=None): """Set and get value for `key` in index using `default`. @@ -807,8 +829,7 @@ def setdefault(self, key, default=None): If `key` is not in index then set corresponding value to `default`. If `key` is in index then ignore `default` and return existing value. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.setdefault('a', 0) 0 >>> index.setdefault('a', 1) @@ -820,19 +841,29 @@ def setdefault(self, key, default=None): """ _cache = self._cache - while True: try: - return self[key] + return _cache[key] except KeyError: - while True: - try: - _cache.add(key, default) - except Timeout: - continue - else: - break + _cache.add(key, default, retry=True) + + def peekitem(self, last=True): + """Peek at key and value item pair in index based on iteration order. + >>> index = Index() + >>> for num, letter in enumerate('xyz'): + ... index[letter] = num + >>> index.peekitem() + ('z', 2) + >>> index.peekitem(last=False) + ('x', 0) + + :param bool last: last item in iteration order (default True) + :return: key and value item pair + :raises KeyError: if cache is empty + + """ + return self._cache.peekitem(last, retry=True) def pop(self, key, default=ENOVAL): """Remove corresponding item for `key` from index and return value. @@ -840,7 +871,7 @@ def pop(self, key, default=ENOVAL): If `key` is missing then return `default`. If `default` is `ENOVAL` then raise KeyError. - >>> index = Index('/tmp/diskcache/index', {'a': 1, 'b': 2}) + >>> index = Index({'a': 1, 'b': 2}) >>> index.pop('a') 1 >>> index.pop('b') @@ -858,19 +889,11 @@ def pop(self, key, default=ENOVAL): :raises KeyError: if key is not found and default is ENOVAL """ - _cache = self._cache - - while True: - try: - value = _cache.pop(key, default=default) - except Timeout: - continue - else: - if value is ENOVAL: - raise KeyError(key) - return value - + value = _cache.pop(key, default=default, retry=True) + if value is ENOVAL: + raise KeyError(key) + return value def popitem(self, last=True): """Remove and return item pair. @@ -879,8 +902,7 @@ def popitem(self, last=True): True else first-in-first-out (FIFO) order. LIFO order imitates a stack and FIFO order imitates a queue. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update([('a', 1), ('b', 2), ('c', 3)]) >>> index.popitem() ('c', 3) @@ -891,32 +913,21 @@ def popitem(self, last=True): >>> index.popitem() Traceback (most recent call last): ... - KeyError + KeyError: 'dictionary is empty' :param bool last: pop last item pair (default True) :return: key and value item pair :raises KeyError: if index is empty """ - # pylint: disable=arguments-differ + # pylint: disable=arguments-differ,unbalanced-tuple-unpacking _cache = self._cache - while True: - try: - if last: - key = next(reversed(_cache)) - else: - key = next(iter(_cache)) - except StopIteration: - raise KeyError - - try: - value = _cache.pop(key) - except (KeyError, Timeout): - continue - else: - return key, value + with _cache.transact(retry=True): + key, value = _cache.peekitem(last=last) + del _cache[key] + return key, value def push(self, value, prefix=None, side='back'): """Push `value` onto `side` of queue in index identified by `prefix`. @@ -929,8 +940,7 @@ def push(self, value, prefix=None, side='back'): See also `Index.pull`. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> print(index.push('apples')) 500000000000000 >>> print(index.push('beans')) @@ -948,14 +958,7 @@ def push(self, value, prefix=None, side='back'): :return: key for item in cache """ - _cache_push = self._cache.push - - while True: - try: - return _cache_push(value, prefix, side) - except Timeout: - continue - + return self._cache.push(value, prefix, side, retry=True) def pull(self, prefix=None, default=(None, None), side='front'): """Pull key and value item pair from `side` of queue in index. @@ -971,8 +974,7 @@ def pull(self, prefix=None, default=(None, None), side='front'): See also `Index.push`. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> for letter in 'abc': ... print(index.push(letter)) 500000000000000 @@ -996,28 +998,20 @@ def pull(self, prefix=None, default=(None, None), side='front'): :return: key and value item pair or default if queue is empty """ - _cache_pull = self._cache.pull - - while True: - try: - return _cache_pull(prefix, default, side) - except Timeout: - continue - + return self._cache.pull(prefix, default, side, retry=True) def clear(self): """Remove all items from index. - """ - _cache_clear = self._cache.clear - - while True: - try: - _cache_clear() - return - except Timeout: - continue + >>> index = Index({'a': 0, 'b': 1, 'c': 2}) + >>> len(index) + 3 + >>> index.clear() + >>> dict(index) + {} + """ + self._cache.clear(retry=True) def __iter__(self): """index.__iter__() <==> iter(index) @@ -1027,14 +1021,12 @@ def __iter__(self): """ return iter(self._cache) - def __reversed__(self): """index.__reversed__() <==> reversed(index) Return iterator of index keys in reversed insertion order. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update([('a', 1), ('b', 2), ('c', 3)]) >>> iterator = reversed(index) >>> next(iterator) @@ -1045,7 +1037,6 @@ def __reversed__(self): """ return reversed(self._cache) - def __len__(self): """index.__len__() <==> len(index) @@ -1054,228 +1045,56 @@ def __len__(self): """ return len(self._cache) + def keys(self): + """Set-like object providing a view of index keys. - if sys.hexversion < 0x03000000: - def keys(self): - """List of index keys. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> index.keys() - ['a', 'b', 'c'] - - :return: list of keys - - """ - return list(self._cache) - - - def values(self): - """List of index values. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> index.values() - [1, 2, 3] - - :return: list of values - - """ - return list(self.itervalues()) - - - def items(self): - """List of index items. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> index.items() - [('a', 1), ('b', 2), ('c', 3)] - - :return: list of items - - """ - return list(self.iteritems()) - - - def iterkeys(self): - """Iterator of index keys. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> list(index.iterkeys()) - ['a', 'b', 'c'] - - :return: iterator of keys - - """ - return iter(self._cache) - - - def itervalues(self): - """Iterator of index values. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> list(index.itervalues()) - [1, 2, 3] - - :return: iterator of values - - """ - _cache = self._cache - - for key in _cache: - while True: - try: - yield _cache[key] - except KeyError: - break - except Timeout: - continue - else: - break - - - def iteritems(self): - """Iterator of index items. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update([('a', 1), ('b', 2), ('c', 3)]) - >>> list(index.iteritems()) - [('a', 1), ('b', 2), ('c', 3)] - - :return: iterator of items - - """ - _cache = self._cache - - for key in _cache: - while True: - try: - yield key, _cache[key] - except KeyError: - break - except Timeout: - continue - else: - break - - - def viewkeys(self): - """Set-like object providing a view of index keys. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> keys_view = index.viewkeys() - >>> 'b' in keys_view - True - - :return: keys view - - """ - return KeysView(self) - - - def viewvalues(self): - """Set-like object providing a view of index values. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> values_view = index.viewvalues() - >>> 2 in values_view - True - - :return: values view - - """ - return ValuesView(self) - - - def viewitems(self): - """Set-like object providing a view of index items. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> items_view = index.viewitems() - >>> ('b', 2) in items_view - True - - :return: items view - - """ - return ItemsView(self) - - - else: - def keys(self): - """Set-like object providing a view of index keys. - - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> keys_view = index.keys() - >>> 'b' in keys_view - True - - :return: keys view - - """ - return KeysView(self) - - - def values(self): - """Set-like object providing a view of index values. + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> keys_view = index.keys() + >>> 'b' in keys_view + True - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> values_view = index.values() - >>> 2 in values_view - True + :return: keys view - :return: values view + """ + return KeysView(self) - """ - return ValuesView(self) + def values(self): + """Set-like object providing a view of index values. + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> values_view = index.values() + >>> 2 in values_view + True - def items(self): - """Set-like object providing a view of index items. + :return: values view - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() - >>> index.update({'a': 1, 'b': 2, 'c': 3}) - >>> items_view = index.items() - >>> ('b', 2) in items_view - True + """ + return ValuesView(self) - :return: items view + def items(self): + """Set-like object providing a view of index items. - """ - return ItemsView(self) + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> items_view = index.items() + >>> ('b', 2) in items_view + True + :return: items view - __hash__ = None + """ + return ItemsView(self) + __hash__ = None # type: ignore def __getstate__(self): return self.directory - def __setstate__(self, state): self.__init__(state) - def __eq__(self, other): """index.__eq__(other) <==> index == other @@ -1284,8 +1103,7 @@ def __eq__(self, other): Comparison to another index or ordered dictionary is order-sensitive. Comparison to all other mappings is order-insensitive. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> pairs = [('a', 1), ('b', 2), ('c', 3)] >>> index.update(pairs) >>> from collections import OrderedDict @@ -1296,6 +1114,7 @@ def __eq__(self, other): True :param other: other mapping in equality comparison + :return: True if index equals other """ if len(self) != len(other): @@ -1309,7 +1128,6 @@ def __eq__(self, other): else: return all(self[key] == other.get(key, ENOVAL) for key in self) - def __ne__(self, other): """index.__ne__(other) <==> index != other @@ -1318,8 +1136,7 @@ def __ne__(self, other): Comparison to another index or ordered dictionary is order-sensitive. Comparison to all other mappings is order-insensitive. - >>> index = Index('/tmp/diskcache/index') - >>> index.clear() + >>> index = Index() >>> index.update([('a', 1), ('b', 2), ('c', 3)]) >>> from collections import OrderedDict >>> od = OrderedDict([('c', 3), ('b', 2), ('a', 1)]) @@ -1329,10 +1146,94 @@ def __ne__(self, other): True :param other: other mapping in inequality comparison + :return: True if index does not equal other """ return not self == other + def memoize(self, name=None, typed=False, ignore=()): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Index + >>> mapping = Index() + >>> @mapping.memoize() + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(mapping[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @mapping.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + return self._cache.memoize(name, typed, ignore=ignore) + + @contextmanager + def transact(self): + """Context manager to perform a transaction by locking the index. + + While the index is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + >>> from diskcache import Index + >>> mapping = Index() + >>> with mapping.transact(): # Atomically increment two keys. + ... mapping['total'] = mapping.get('total', 0) + 123.4 + ... mapping['count'] = mapping.get('count', 0) + 1 + >>> with mapping.transact(): # Atomically calculate average. + ... average = mapping['total'] / mapping['count'] + >>> average + 123.4 + + :return: context manager for use in `with` statement + + """ + with self._cache.transact(retry=True): + yield def __repr__(self): """index.__repr__() <==> repr(index) @@ -1342,7 +1243,3 @@ def __repr__(self): """ name = type(self).__name__ return '{0}({1!r})'.format(name, self.directory) - - - def __del__(self): - self._cache.close() diff --git a/diskcache/recipes.py b/diskcache/recipes.py new file mode 100644 index 0000000..babb68f --- /dev/null +++ b/diskcache/recipes.py @@ -0,0 +1,488 @@ +"""Disk Cache Recipes +""" + +import functools +import math +import os +import random +import threading +import time + +from .core import ENOVAL, args_to_key, full_name + + +class Averager: + """Recipe for calculating a running average. + + Sometimes known as "online statistics," the running average maintains the + total and count. The average can then be calculated at any time. + + Assumes the key will not be evicted. Set the eviction policy to 'none' on + the cache to guarantee the key is not evicted. + + >>> import diskcache + >>> cache = diskcache.FanoutCache() + >>> ave = Averager(cache, 'latency') + >>> ave.add(0.080) + >>> ave.add(0.120) + >>> ave.get() + 0.1 + >>> ave.add(0.160) + >>> ave.pop() + 0.12 + >>> print(ave.get()) + None + + """ + + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def add(self, value): + """Add `value` to average.""" + with self._cache.transact(retry=True): + total, count = self._cache.get(self._key, default=(0.0, 0)) + total += value + count += 1 + self._cache.set( + self._key, + (total, count), + expire=self._expire, + tag=self._tag, + ) + + def get(self): + """Get current average or return `None` if count equals zero.""" + total, count = self._cache.get(self._key, default=(0.0, 0), retry=True) + return None if count == 0 else total / count + + def pop(self): + """Return current average and delete key.""" + total, count = self._cache.pop(self._key, default=(0.0, 0), retry=True) + return None if count == 0 else total / count + + +class Lock: + """Recipe for cross-process and cross-thread lock. + + Assumes the key will not be evicted. Set the eviction policy to 'none' on + the cache to guarantee the key is not evicted. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> lock = Lock(cache, 'report-123') + >>> lock.acquire() + >>> lock.release() + >>> with lock: + ... pass + + """ + + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def acquire(self): + """Acquire lock using spin-lock algorithm.""" + while True: + added = self._cache.add( + self._key, + None, + expire=self._expire, + tag=self._tag, + retry=True, + ) + if added: + break + time.sleep(0.001) + + def release(self): + """Release lock by deleting key.""" + self._cache.delete(self._key, retry=True) + + def locked(self): + """Return true if the lock is acquired.""" + return self._key in self._cache + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +class RLock: + """Recipe for cross-process and cross-thread re-entrant lock. + + Assumes the key will not be evicted. Set the eviction policy to 'none' on + the cache to guarantee the key is not evicted. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> rlock = RLock(cache, 'user-123') + >>> rlock.acquire() + >>> rlock.acquire() + >>> rlock.release() + >>> with rlock: + ... pass + >>> rlock.release() + >>> rlock.release() + Traceback (most recent call last): + ... + AssertionError: cannot release un-acquired lock + + """ + + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def acquire(self): + """Acquire lock by incrementing count using spin-lock algorithm.""" + pid = os.getpid() + tid = threading.get_ident() + pid_tid = '{}-{}'.format(pid, tid) + + while True: + with self._cache.transact(retry=True): + value, count = self._cache.get(self._key, default=(None, 0)) + if pid_tid == value or count == 0: + self._cache.set( + self._key, + (pid_tid, count + 1), + expire=self._expire, + tag=self._tag, + ) + return + time.sleep(0.001) + + def release(self): + """Release lock by decrementing count.""" + pid = os.getpid() + tid = threading.get_ident() + pid_tid = '{}-{}'.format(pid, tid) + + with self._cache.transact(retry=True): + value, count = self._cache.get(self._key, default=(None, 0)) + is_owned = pid_tid == value and count > 0 + assert is_owned, 'cannot release un-acquired lock' + self._cache.set( + self._key, + (value, count - 1), + expire=self._expire, + tag=self._tag, + ) + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +class BoundedSemaphore: + """Recipe for cross-process and cross-thread bounded semaphore. + + Assumes the key will not be evicted. Set the eviction policy to 'none' on + the cache to guarantee the key is not evicted. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> semaphore = BoundedSemaphore(cache, 'max-cons', value=2) + >>> semaphore.acquire() + >>> semaphore.acquire() + >>> semaphore.release() + >>> with semaphore: + ... pass + >>> semaphore.release() + >>> semaphore.release() + Traceback (most recent call last): + ... + AssertionError: cannot release un-acquired semaphore + + """ + + def __init__(self, cache, key, value=1, expire=None, tag=None): + self._cache = cache + self._key = key + self._value = value + self._expire = expire + self._tag = tag + + def acquire(self): + """Acquire semaphore by decrementing value using spin-lock algorithm.""" + while True: + with self._cache.transact(retry=True): + value = self._cache.get(self._key, default=self._value) + if value > 0: + self._cache.set( + self._key, + value - 1, + expire=self._expire, + tag=self._tag, + ) + return + time.sleep(0.001) + + def release(self): + """Release semaphore by incrementing value.""" + with self._cache.transact(retry=True): + value = self._cache.get(self._key, default=self._value) + assert self._value > value, 'cannot release un-acquired semaphore' + value += 1 + self._cache.set( + self._key, + value, + expire=self._expire, + tag=self._tag, + ) + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +def throttle( + cache, + count, + seconds, + name=None, + expire=None, + tag=None, + time_func=time.time, + sleep_func=time.sleep, +): + """Decorator to throttle calls to function. + + Assumes keys will not be evicted. Set the eviction policy to 'none' on the + cache to guarantee the keys are not evicted. + + >>> import diskcache, time + >>> cache = diskcache.Cache() + >>> count = 0 + >>> @throttle(cache, 2, 1) # 2 calls per 1 second + ... def increment(): + ... global count + ... count += 1 + >>> start = time.time() + >>> while (time.time() - start) <= 2: + ... increment() + >>> count in (6, 7) # 6 or 7 calls depending on CPU load + True + + """ + + def decorator(func): + rate = count / float(seconds) + key = full_name(func) if name is None else name + now = time_func() + cache.set(key, (now, count), expire=expire, tag=tag, retry=True) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + while True: + with cache.transact(retry=True): + last, tally = cache.get(key) + now = time_func() + tally += (now - last) * rate + delay = 0 + + if tally > count: + cache.set(key, (now, count - 1), expire) + elif tally >= 1: + cache.set(key, (now, tally - 1), expire) + else: + delay = (1 - tally) / rate + + if delay: + sleep_func(delay) + else: + break + + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def barrier(cache, lock_factory, name=None, expire=None, tag=None): + """Barrier to calling decorated function. + + Supports different kinds of locks: Lock, RLock, BoundedSemaphore. + + Assumes keys will not be evicted. Set the eviction policy to 'none' on the + cache to guarantee the keys are not evicted. + + >>> import diskcache, time + >>> cache = diskcache.Cache() + >>> @barrier(cache, Lock) + ... def work(num): + ... print('worker started') + ... time.sleep(1) + ... print('worker finished') + >>> import multiprocessing.pool + >>> pool = multiprocessing.pool.ThreadPool(2) + >>> _ = pool.map(work, range(2)) + worker started + worker finished + worker started + worker finished + >>> pool.terminate() + + """ + + def decorator(func): + key = full_name(func) if name is None else name + lock = lock_factory(cache, key, expire=expire, tag=tag) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with lock: + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def memoize_stampede( + cache, expire, name=None, typed=False, tag=None, beta=1, ignore=() +): + """Memoizing cache decorator with cache stampede protection. + + Cache stampedes are a type of system overload that can occur when parallel + computing systems using memoization come under heavy load. This behaviour + is sometimes also called dog-piling, cache miss storm, cache choking, or + the thundering herd problem. + + The memoization decorator implements cache stampede protection through + early recomputation. Early recomputation of function results will occur + probabilistically before expiration in a background thread of + execution. Early probabilistic recomputation is based on research by + Vattani, A.; Chierichetti, F.; Lowenstein, K. (2015), Optimal Probabilistic + Cache Stampede Prevention, VLDB, pp. 886-897, ISSN 2150-8097 + + If name is set to None (default), the callable name will be determined + automatically. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as distinct + calls with distinct results. + + The original underlying function is accessible through the `__wrapped__` + attribute. This is useful for introspection, for bypassing the cache, or + for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @memoize_stampede(cache, expire=1) + ... def fib(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fib(number - 1) + fib(number - 2) + >>> print(fib(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the cache + key used for the given arguments. + + >>> key = fib.__cache_key__(100) + >>> del cache[key] + + Remember to call memoize when decorating a callable. If you forget, then a + TypeError will occur. + + :param cache: cache to store callable arguments and return values + :param float expire: seconds until arguments expire + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param str tag: text to associate with arguments (default None) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in Cache.memoize + def decorator(func): + """Decorator created by memoize call for callable.""" + base = (full_name(func),) if name is None else (name,) + + def timer(*args, **kwargs): + """Time execution of `func` and return result and time delta.""" + start = time.time() + result = func(*args, **kwargs) + delta = time.time() - start + return result, delta + + @functools.wraps(func) + def wrapper(*args, **kwargs): + """Wrapper for callable to cache arguments and return values.""" + key = wrapper.__cache_key__(*args, **kwargs) + pair, expire_time = cache.get( + key, + default=ENOVAL, + expire_time=True, + retry=True, + ) + + if pair is not ENOVAL: + result, delta = pair + now = time.time() + ttl = expire_time - now + + if (-delta * beta * math.log(random.random())) < ttl: + return result # Cache hit. + + # Check whether a thread has started for early recomputation. + + thread_key = key + (ENOVAL,) + thread_added = cache.add( + thread_key, + None, + expire=delta, + retry=True, + ) + + if thread_added: + # Start thread for early recomputation. + def recompute(): + with cache: + pair = timer(*args, **kwargs) + cache.set( + key, + pair, + expire=expire, + tag=tag, + retry=True, + ) + + thread = threading.Thread(target=recompute) + thread.daemon = True + thread.start() + + return result + + pair = timer(*args, **kwargs) + cache.set(key, pair, expire=expire, tag=tag, retry=True) + return pair[0] + + def __cache_key__(*args, **kwargs): + """Make key for cache given function arguments.""" + return args_to_key(base, args, kwargs, typed, ignore) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator diff --git a/diskcache/stampede.py b/diskcache/stampede.py deleted file mode 100644 index b103338..0000000 --- a/diskcache/stampede.py +++ /dev/null @@ -1,78 +0,0 @@ -"Stampede barrier implementation." - -import functools as ft -import math -import random -import tempfile -import time - -from .core import Cache, ENOVAL - - -class StampedeBarrier(object): - """Stampede barrier mitigates cache stampedes. - - Cache stampedes are also known as dog-piling, cache miss storm, cache - choking, or the thundering herd problem. - - Based on research by Vattani, A.; Chierichetti, F.; Lowenstein, K. (2015), - Optimal Probabilistic Cache Stampede Prevention, - VLDB, pp. 886?897, ISSN 2150-8097 - - Example: - - ```python - stampede_barrier = StampedeBarrier('/tmp/user_data', expire=3) - - @stampede_barrier - def load_user_info(user_id): - return database.lookup_user_info_by_id(user_id) - ``` - - """ - # pylint: disable=too-few-public-methods - def __init__(self, cache=None, expire=None): - if isinstance(cache, Cache): - pass - elif cache is None: - cache = Cache(tempfile.mkdtemp()) - else: - cache = Cache(cache) - - self._cache = cache - self._expire = expire - - def __call__(self, func): - cache = self._cache - expire = self._expire - - @ft.wraps(func) - def wrapper(*args, **kwargs): - "Wrapper function to cache function result." - key = (args, kwargs) - - try: - result, expire_time, delta = cache.get( - key, default=ENOVAL, expire_time=True, tag=True - ) - - if result is ENOVAL: - raise KeyError - - now = time.time() - ttl = expire_time - now - - if (-delta * math.log(random.random())) < ttl: - return result - - except KeyError: - pass - - now = time.time() - result = func(*args, **kwargs) - delta = time.time() - now - cache.set(key, result, expire=expire, tag=delta) - - return result - - return wrapper diff --git a/docs/Makefile b/docs/Makefile index e3bd50b..d4bb2cb 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,216 +1,20 @@ -# Makefile for Sphinx documentation +# Minimal makefile for Sphinx documentation # -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . BUILDDIR = _build -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help +# Put it first so that "make" without argument is like "make help". help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - -.PHONY: html -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/DiskCache.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/DiskCache.qhc" - -.PHONY: applehelp -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/DiskCache" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/DiskCache" - @echo "# devhelp" - -.PHONY: epub -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: latex -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -.PHONY: xml -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." +.PHONY: help Makefile -.PHONY: pseudoxml -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/core-p1-delete.png b/docs/_static/core-p1-delete.png index 6cf196f..25907e5 100644 Binary files a/docs/_static/core-p1-delete.png and b/docs/_static/core-p1-delete.png differ diff --git a/docs/_static/core-p1-get.png b/docs/_static/core-p1-get.png index 434d3e5..66ff449 100644 Binary files a/docs/_static/core-p1-get.png and b/docs/_static/core-p1-get.png differ diff --git a/docs/_static/core-p1-set.png b/docs/_static/core-p1-set.png index a59b739..bf43574 100644 Binary files a/docs/_static/core-p1-set.png and b/docs/_static/core-p1-set.png differ diff --git a/docs/_static/core-p8-delete.png b/docs/_static/core-p8-delete.png index f717e0e..c6cb299 100644 Binary files a/docs/_static/core-p8-delete.png and b/docs/_static/core-p8-delete.png differ diff --git a/docs/_static/core-p8-get.png b/docs/_static/core-p8-get.png index 0c4451b..889c032 100644 Binary files a/docs/_static/core-p8-get.png and b/docs/_static/core-p8-get.png differ diff --git a/docs/_static/core-p8-set.png b/docs/_static/core-p8-set.png index 5be3b6c..f653fe1 100644 Binary files a/docs/_static/core-p8-set.png and b/docs/_static/core-p8-set.png differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 50b1356..1a8e2a0 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -3,6 +3,15 @@ table { width: 100%; } +#comparison table { + display: block; + overflow: scroll; +} + th.head { text-align: center; } + +div.body { + min-width: 240px; +} diff --git a/docs/_static/djangocache-delete.png b/docs/_static/djangocache-delete.png index 59d6057..7d4225c 100644 Binary files a/docs/_static/djangocache-delete.png and b/docs/_static/djangocache-delete.png differ diff --git a/docs/_static/djangocache-get.png b/docs/_static/djangocache-get.png index e92d4a7..4805d25 100644 Binary files a/docs/_static/djangocache-get.png and b/docs/_static/djangocache-get.png differ diff --git a/docs/_static/djangocache-set.png b/docs/_static/djangocache-set.png index bd51a62..a2a241a 100644 Binary files a/docs/_static/djangocache-set.png and b/docs/_static/djangocache-set.png differ diff --git a/docs/_static/early-recomputation-03.png b/docs/_static/early-recomputation-03.png new file mode 100644 index 0000000..2c75aff Binary files /dev/null and b/docs/_static/early-recomputation-03.png differ diff --git a/docs/_static/early-recomputation-05.png b/docs/_static/early-recomputation-05.png new file mode 100644 index 0000000..17d8222 Binary files /dev/null and b/docs/_static/early-recomputation-05.png differ diff --git a/docs/_static/early-recomputation.png b/docs/_static/early-recomputation.png new file mode 100644 index 0000000..6e69b28 Binary files /dev/null and b/docs/_static/early-recomputation.png differ diff --git a/docs/_static/no-caching.png b/docs/_static/no-caching.png new file mode 100644 index 0000000..d6c7e12 Binary files /dev/null and b/docs/_static/no-caching.png differ diff --git a/docs/_static/synchronized-locking.png b/docs/_static/synchronized-locking.png new file mode 100644 index 0000000..030c2ca Binary files /dev/null and b/docs/_static/synchronized-locking.png differ diff --git a/docs/_static/traditional-caching.png b/docs/_static/traditional-caching.png new file mode 100644 index 0000000..755d2e9 Binary files /dev/null and b/docs/_static/traditional-caching.png differ diff --git a/docs/api.rst b/docs/api.rst index 90f15f9..e38f254 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,19 +1,17 @@ -DiskCache API Reference -======================= - -The :doc:`tutorial` provides a helpful walkthrough of most methods. +.. automodule:: diskcache .. contents:: :local: -DjangoCache ------------ +Cache +----- -Read the :ref:`DjangoCache tutorial ` for example usage. +Read the :ref:`Cache tutorial ` for example usage. -.. autoclass:: diskcache.DjangoCache +.. autoclass:: diskcache.Cache :members: :special-members: + :exclude-members: __weakref__ FanoutCache ----------- @@ -25,16 +23,54 @@ Read the :ref:`FanoutCache tutorial ` for example usage. :special-members: :exclude-members: __weakref__ -Cache +DjangoCache +----------- + +Read the :ref:`DjangoCache tutorial ` for example usage. + +.. autoclass:: diskcache.DjangoCache + :members: + :special-members: + +Deque ----- -Read the :ref:`Cache tutorial ` for example usage. +.. autoclass:: diskcache.Deque + :members: + :special-members: + :exclude-members: __weakref__ -.. autoclass:: diskcache.Cache +Index +----- + +.. autoclass:: diskcache.Index :members: :special-members: :exclude-members: __weakref__ +Recipes +------- + +.. autoclass:: diskcache.Averager + :members: + +.. autoclass:: diskcache.Lock + :members: + +.. autoclass:: diskcache.RLock + :members: + +.. autoclass:: diskcache.BoundedSemaphore + :members: + +.. autodecorator:: diskcache.throttle + +.. autodecorator:: diskcache.barrier + +.. autodecorator:: diskcache.memoize_stampede + +.. _constants: + Constants --------- @@ -58,7 +94,7 @@ Read the :ref:`Settings tutorial ` for details. pragma. * `sqlite_synchronous` (int) default 1, "NORMAL" - SQLite synchronous pragma. - * `disk_min_file_size` (int, in bytes) default one kilobyte - values with + * `disk_min_file_size` (int, in bytes) default 32 kilobytes - values with greater size are stored in files. * `disk_pickle_protocol` (int) default highest Pickle protocol - the Pickle protocol to use for data types that are not natively supported. @@ -80,23 +116,17 @@ Read the :ref:`Disk tutorial ` for details. :special-members: :exclude-members: __weakref__ -Timeout -------- +JSONDisk +-------- -.. autoexception:: diskcache.Timeout - -Deque ------ +Read the :ref:`Disk tutorial ` for details. -.. autoclass:: diskcache.Deque +.. autoclass:: diskcache.JSONDisk :members: :special-members: :exclude-members: __weakref__ -Index ------ +Timeout +------- -.. autoclass:: diskcache.Index - :members: - :special-members: - :exclude-members: __weakref__ +.. autoexception:: diskcache.Timeout diff --git a/docs/cache-benchmarks.rst b/docs/cache-benchmarks.rst index 3f0343d..5b125e8 100644 --- a/docs/cache-benchmarks.rst +++ b/docs/cache-benchmarks.rst @@ -116,7 +116,7 @@ Timings for pylibmc.Client Total 98999 2.669s ========= ========= ========= ========= ========= ========= ========= ========= -Memcached performance is low latency and very stable. +Memcached performance is low latency and stable. ========= ========= ========= ========= ========= ========= ========= ========= Timings for redis.StrictRedis @@ -144,8 +144,8 @@ Get .. image:: _static/core-p8-get.png -Under heavy load, :doc:`DiskCache ` gets are very low latency. At the -90th percentile, they are less than half the latency of Memcached. +Under heavy load, :doc:`DiskCache ` gets are low latency. At the 90th +percentile, they are less than half the latency of Memcached. Set ... diff --git a/docs/case-study-delay-fuzzer.rst b/docs/case-study-delay-fuzzer.rst deleted file mode 100644 index e71bdae..0000000 --- a/docs/case-study-delay-fuzzer.rst +++ /dev/null @@ -1,138 +0,0 @@ -Case Study: Delay Fuzzer -======================== - -Raymond keynote: -https://dl.dropboxusercontent.com/u/3967849/pybay2017_keynote/_build/html/index.html - -Fuzzing technique: -https://dl.dropboxusercontent.com/u/3967849/pybay2017_keynote/_build/html/threading.html#fuzzing - -Code below is simple on purpose. Not something to use in production. Ok for -testing. - -// discuss sys.settrace - - >>> def delayfuzzer(function): - ... """Insert random delays into function. - ... - ... WARNING: Not to be used in production scenarios. - ... The use of `sys.settrace` may affect other Python - ... tools like `pdb` and `coverage`. - ... - ... Decorator to insert random delays into a function to - ... encourage race conditions in multi-threaded code. - ... - ... """ - ... from functools import wraps - ... from sys import settrace - ... - ... try: - ... code = function.__code__ - ... except AttributeError: # Python 2 compatibility. - ... code = function.co_code - ... - ... def tracer(frame, event, arg): - ... "Activate sleeper in calls to function." - ... if event == 'call' and frame.f_code is code: - ... return sleeper - ... - ... @wraps(function) - ... def wrapper(*args, **kwargs): - ... """Set tracer before calling function. - ... - ... Tracing is thread-local so set the tracer before - ... every function call. - ... - ... """ - ... settrace(tracer) - ... return function(*args, **kwargs) - ... - ... return wrapper - -Sleeper function that prints location: - - >>> from time import sleep - >>> from random import expovariate - >>> def sleeper(frame, event, arg): - ... "Sleep for random period." - ... lineno = frame.f_lineno - ... print('Tracing line %s in diskcache/core.py' % lineno) - ... sleep(expovariate(100)) - -Check that it's working: - - >>> import diskcache - >>> diskcache.Cache.incr = delayfuzzer(diskcache.Cache.incr) - >>> cache = diskcache.FanoutCache('tmp') - >>> cache.incr(0) - Tracing line 797 in diskcache/core.py - Tracing line 798 in diskcache/core.py - Tracing line 800 in diskcache/core.py - Tracing line 804 in diskcache/core.py - Tracing line 805 in diskcache/core.py - Tracing line 807 in diskcache/core.py - Tracing line 808 in diskcache/core.py - Tracing line 811 in diskcache/core.py - Tracing line 812 in diskcache/core.py - Tracing line 813 in diskcache/core.py - Tracing line 814 in diskcache/core.py - Tracing line 815 in diskcache/core.py - Tracing line 815 in diskcache/core.py - 1 - >>> cache.clear() - 1 - -Simple sleeper function: - - >>> def sleeper(frame, event, arg): - ... "Sleep for random period." - ... sleep(expovariate(100)) - -Increment all numbers in a range: - - >>> def task(cache): - ... for num in range(100): - ... cache.incr(num, retry=True) - -Process worker to start many tasks in separate threads. - - >>> import threading - >>> def worker(): - ... cache = diskcache.FanoutCache('tmp') - ... threads = [] - ... - ... for num in range(8): - ... thread = threading.Thread(target=task, args=(cache,)) - ... threads.append(thread) - ... - ... for thread in threads: - ... thread.start() - ... - ... for thread in threads: - ... thread.join() - -Start many worker processes: - - >>> import multiprocessing - >>> def main(): - ... processes = [] - ... - ... for _ in range(8): - ... process = multiprocessing.Process(target=worker) - ... processes.append(process) - ... - ... for process in processes: - ... process.start() - ... - ... for process in processes: - ... process.join() - -Ok, here goes: - - >>> main() - >>> sorted(cache) == list(range(100)) - True - >>> all(cache[key] == 64 for key in cache) - True - -Yaay! It worked. diff --git a/docs/case-study-landing-page-caching.rst b/docs/case-study-landing-page-caching.rst new file mode 100644 index 0000000..677186e --- /dev/null +++ b/docs/case-study-landing-page-caching.rst @@ -0,0 +1,142 @@ +Case Study: Landing Page Caching +================================ + +:doc:`DiskCache ` version 4 added recipes for cache stampede mitigation. +Cache stampedes are a type of system overload that can occur when parallel +computing systems using memoization come under heavy load. This behaviour is +sometimes also called dog-piling, cache miss storm, cache choking, or the +thundering herd problem. Let's look at how that applies to landing page +caching. + +.. code-block:: python + + import time + + def generate_landing_page(): + time.sleep(0.2) # Work really hard. + # Return HTML response. + +Imagine a website under heavy load with a function used to generate the landing +page. There are five processes each with two threads for a total of ten +concurrent workers. The landing page is loaded constantly and takes about two +hundred milliseconds to generate. + +.. image:: _static/no-caching.png + +When we look at the number of concurrent workers and the latency with no +caching at all, the graph looks as above. Notice each worker constantly +regenerates the page with a consistently slow latency. + +.. code-block:: python + :emphasize-lines: 5 + + import diskcache as dc + + cache = dc.Cache() + + @cache.memoize(expire=1) + def generate_landing_page(): + time.sleep(0.2) + +Assume the result of generating the landing page can be memoized for one +second. Memoization supports a traditional caching strategy. After each second, +the cached HTML expires and all ten workers rush to regenerate the result. + +.. image:: _static/traditional-caching.png + +There is a huge improvement in average latency now but some requests experience +worse latency than before due to the added overhead of caching. The cache +stampede is visible too as the spikes in the concurrency graph. If generating +the landing page requires significant resources then the spikes may be +prohibitive. + +To reduce the number of concurrent workers, a barrier can be used to +synchronize generating the landing page. + +.. code-block:: python + :emphasize-lines: 1,2,3 + + @cache.memoize(expire=0) + @dc.barrier(cache, dc.Lock) + @cache.memoize(expire=1) + def generate_landing_page(): + time.sleep(0.2) + +The double-checked locking uses two memoization decorators to optimistically +look up the cached result before locking. With `expire` set to zero, the +cache's get-operation is performed but the set-operation is skipped. Only the +inner-nested memoize decorator will update the cache. + +.. image:: _static/synchronized-locking.png + +The number of concurrent workers is now greatly improved. Rather than having +ten workers all attempt to generate the same result, a single worker generates +the result and the other ten benefit. The maximum latency has increased however +as three layers of caching and locking wrap the function. + +Ideally, the system would anticipate the pending expiration of the cached item +and would recompute the result in a separate thread of execution. Coordinating +recomputation would be a function of the number of workers, the expiration +time, and the duration of computation. Fortunately, Vattani, et al. published +the solution in "Optimal Probabilistic Cache Stampede Prevention" in 2015. + +.. code-block:: python + :emphasize-lines: 1 + + @dc.memoize_stampede(cache, expire=1) + def generate_landing_page(): + time.sleep(0.2) + +Early probabilistic recomputation uses a random number generator to simulate a +cache miss prior to expiration. The new result is then computed in a separate +thread while the cached result is returned to the caller. When the cache item +is missing, the result is computed and cached synchronously. + +.. image:: _static/early-recomputation.png + +The latency is now its theoretical best. An initial warmup execution takes two +hundred milliseconds and the remaining calls all return immediately from the +cache. Behind the scenes, separate threads of execution are recomputing the +result of workers and updating the cache. The concurrency graph shows a nearly +constant stream of workers recomputing the function's result. + +.. code-block:: python + :emphasize-lines: 1 + + @dc.memoize_stampede(cache, expire=1, beta=0.5) + def generate_landing_page(): + time.sleep(0.2) + +Vattani described an additional parameter, :math:`\beta`, which could be used +to tune the eagerness of recomputation. As the number and frequency of +concurrent worker calls increases, eagerness can be lessened by decreasing the +:math:`\beta` parameter. The default value of :math:`\beta` is one, and above +it is set to half. + +.. image:: _static/early-recomputation-05.png + +Latency is now still its theoretical best while the worker load has decreased +significantly. The likelihood of simulated cache misses is now half what it was +before. The value was determined through experimentation. + +.. code-block:: python + :emphasize-lines: 1 + + @dc.memoize_stampede(cache, expire=1, beta=0.3) + def generate_landing_page(): + time.sleep(0.2) + +Lets see what happens when :math:`\beta` is set too low. + +.. image:: _static/early-recomputation-03.png + +When set too low, the cache item expires before a new value is recomputed. The +real cache miss then causes the workers to synchronously recompute the landing +page and cache the result. With no barrier in place, eleven workers cause a +cache stampede. The eleven workers are composed of ten synchronous workers and +one in a background thread. The best way to customize :math:`\beta` is through +experimentation, otherwise the default is reasonable. + +:doc:`DiskCache ` provides data types and recipes for memoization and +mitigation of cache stampedes. The decorators provided are composable for a +variety of scenarios. The best way to get started is with the :doc:`tutorial`. diff --git a/docs/case-study-web-crawler.rst b/docs/case-study-web-crawler.rst index 982a3c8..c37e2a5 100644 --- a/docs/case-study-web-crawler.rst +++ b/docs/case-study-web-crawler.rst @@ -116,7 +116,7 @@ the crawl function and query it. >>> len(results) 99 -As an added benefit, our code also now works in parallel. For free! +As an added benefit, our code also now works in parallel. >>> results.clear() >>> from multiprocessing import Process diff --git a/docs/conf.py b/docs/conf.py index 5f04890..92bf3ec 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,31 +1,33 @@ -# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. # -# DiskCache documentation build configuration file, created by -# sphinx-quickstart on Wed Feb 10 20:20:15 2016. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html -import sys -import os +# -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. + +import os +import sys sys.path.insert(0, os.path.abspath('..')) + import diskcache -from diskcache import __version__ -# -- General configuration ------------------------------------------------ -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# -- Project information ----------------------------------------------------- + +project = 'DiskCache' +copyright = '2023, Grant Jenks' +author = 'Grant Jenks' + +# The full version, including alpha/beta/rc tags +release = diskcache.__version__ + + +# -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -38,77 +40,13 @@ # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'DiskCache' -copyright = u'2016, Grant Jenks' -author = u'Grant Jenks' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = __version__ -# The full version, including alpha/beta/rc tags. -release = version - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -# -- Options for HTML output ---------------------------------------------- +# -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. @@ -121,7 +59,6 @@ 'logo': 'gj-logo.png', 'logo_name': True, 'logo_text_align': 'center', - 'travis_button': True, 'analytics_id': 'UA-19364636-2', 'show_powered_by': False, 'show_related': True, @@ -130,43 +67,11 @@ 'github_type': 'star', } -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - # Custom sidebar templates, maps document names to template names. html_sidebars = { '**': [ @@ -178,134 +83,5 @@ ] } -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' -#html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'DiskCacheDoc' - def setup(app): - app.add_stylesheet('custom.css') - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'DiskCache.tex', u'DiskCache Documentation', - u'Grant Jenks', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'diskcache', u'DiskCache Documentation', - [author], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'DiskCache', u'DiskCache Documentation', - author, 'DiskCache', 'Disk and file backed cache.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False + app.add_css_file('custom.css') diff --git a/docs/development.rst b/docs/development.rst index a37c3a7..828c3be 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -29,7 +29,7 @@ Requests for Contributions #. Backend Compatibility - #. `Flask-Cache `_ + #. `Flask-Caching `_ #. `Beaker `_ #. `dogpile.cache `_ @@ -65,13 +65,12 @@ counterparts are necessary for some benchmarks. Testing ------- -:doc:`DiskCache ` currently tests against four versions of Python: +:doc:`DiskCache ` currently tests against five versions of Python: -* CPython 2.7 -* CPython 3.4 * CPython 3.5 * CPython 3.6 -* PyPy2 +* CPython 3.7 +* CPython 3.8 Testing uses `tox `_. If you don't want to install all the development requirements, then, after downloading, you can diff --git a/docs/djangocache-benchmarks.rst b/docs/djangocache-benchmarks.rst index 4f791ff..88d1fec 100644 --- a/docs/djangocache-benchmarks.rst +++ b/docs/djangocache-benchmarks.rst @@ -102,8 +102,8 @@ Get .. image:: _static/djangocache-get.png -Under heavy load, :class:`DjangoCache ` gets are very -low latency. At the 99th percentile they are on par with the Memcached cache +Under heavy load, :class:`DjangoCache ` gets are low +latency. At the 99th percentile they are on par with the Memcached cache backend. Set @@ -157,7 +157,7 @@ Timings for memcached Total 791992 68.825s ========= ========= ========= ========= ========= ========= ========= ========= -Memcached performance is low latency and very stable. +Memcached performance is low latency and stable. ========= ========= ========= ========= ========= ========= ========= ========= Timings for redis diff --git a/docs/index.rst b/docs/index.rst index ed0357a..cea1122 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,145 +1,13 @@ -DiskCache: Disk Backed Cache -============================ - -`DiskCache`_ is an Apache2 licensed disk and file backed cache library, written -in pure-Python, and compatible with Django. - -The cloud-based computing of 2018 puts a premium on memory. Gigabytes of empty -space is left on disks as processes vie for memory. Among these processes is -Memcached (and sometimes Redis) which is used as a cache. Wouldn't it be nice -to leverage empty disk space for caching? - -Django is Python's most popular web framework and ships with several caching -backends. Unfortunately the file-based cache in Django is essentially -broken. The culling method is random and large caches repeatedly scan a cache -directory which slows linearly with growth. Can you really allow it to take -sixty milliseconds to store a key in a cache with a thousand items? - -In Python, we can do better. And we can do it in pure-Python! - -:: - - In [1]: import pylibmc - In [2]: client = pylibmc.Client(['127.0.0.1'], binary=True) - In [3]: client[b'key'] = b'value' - In [4]: %timeit client[b'key'] - - 10000 loops, best of 3: 25.4 µs per loop - - In [5]: import diskcache as dc - In [6]: cache = dc.Cache('tmp') - In [7]: cache[b'key'] = b'value' - In [8]: %timeit cache[b'key'] - - 100000 loops, best of 3: 11.8 µs per loop - -**Note:** Micro-benchmarks have their place but are not a substitute for real -measurements. DiskCache offers cache benchmarks to defend its performance -claims. Micro-optimizations are avoided but your mileage may vary. - -DiskCache efficiently makes gigabytes of storage space available for -caching. By leveraging rock-solid database libraries and memory-mapped files, -cache performance can match and exceed industry-standard solutions. There's no -need for a C compiler or running another process. Performance is a feature and -testing has 100% coverage with unit tests and hours of stress. - -Testimonials ------------- - -Does your company or website use `DiskCache`_? Send us a `message -`_ and let us know. - -Features --------- - -- Pure-Python -- Fully Documented -- Benchmark comparisons (alternatives, Django cache backends) -- 100% test coverage -- Hours of stress testing -- Performance matters -- Django compatible API -- Thread-safe and process-safe -- Supports multiple eviction policies (LRU and LFU included) -- Keys support "tag" metadata and eviction -- Developed on Python 2.7 -- Tested on CPython 2.7, 3.4, 3.5, 3.6 and PyPy -- Tested on Linux, Mac OS X, and Windows -- Tested using Travis CI and AppVeyor CI - -.. image:: https://api.travis-ci.org/grantjenks/python-diskcache.svg?branch=master - :target: http://www.grantjenks.com/docs/diskcache/ - -.. image:: https://ci.appveyor.com/api/projects/status/github/grantjenks/python-diskcache?branch=master&svg=true - :target: http://www.grantjenks.com/docs/diskcache/ - -Quickstart ----------- - -Installing DiskCache is simple with -`pip `_:: - - $ pip install diskcache - -You can access documentation in the interpreter with Python's built-in help -function:: - - >>> from diskcache import Cache, FanoutCache, DjangoCache - >>> help(Cache) - >>> help(FanoutCache) - >>> help(DjangoCache) - -User Guide ----------- - -For those wanting more details, this part of the documentation describes -introduction, benchmarks, development, and API. +.. include:: ../README.rst .. toctree:: - :maxdepth: 1 + :hidden: tutorial cache-benchmarks djangocache-benchmarks case-study-web-crawler + case-study-landing-page-caching sf-python-2017-meetup-talk api development - -Reference and Indices ---------------------- - -* `DiskCache Documentation`_ -* `DiskCache at PyPI`_ -* `DiskCache at GitHub`_ -* `DiskCache Issue Tracker`_ -* :ref:`search` -* :ref:`genindex` - -.. _`DiskCache Documentation`: http://www.grantjenks.com/docs/diskcache/ -.. _`DiskCache at PyPI`: https://pypi.python.org/pypi/diskcache/ -.. _`DiskCache at GitHub`: https://github.com/grantjenks/python-diskcache/ -.. _`DiskCache Issue Tracker`: https://github.com/grantjenks/python-diskcache/issues/ - -Apache2 License ---------------- - -A large number of open source projects you find today are `GPL Licensed`_. -A project that is released as GPL cannot be used in any commercial product -without the product itself also being offered as open source. - -The MIT, BSD, ISC, and Apache2 licenses are great alternatives to the GPL -that allow your open-source software to be used freely in proprietary, -closed-source software. - -DiskCache is released under terms of the `Apache2 License`_. - -.. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php -.. _`Apache2 License`: http://opensource.org/licenses/Apache-2.0 - -DiskCache License ------------------ - -.. include:: ../LICENSE - -.. _`DiskCache`: http://www.grantjenks.com/docs/diskcache/ diff --git a/docs/make.bat b/docs/make.bat index e1a063b..2119f51 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,62 +1,18 @@ @ECHO OFF +pushd %~dp0 + REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) +set SOURCEDIR=. set BUILDDIR=_build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . -set I18NSPHINXOPTS=%SPHINXOPTS% . -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) if "%1" == "" goto help -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - echo. coverage to run coverage check of the documentation if enabled - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -REM Check if sphinx-build is available and fallback to Python version if any -%SPHINXBUILD% 1>NUL 2>NUL -if errorlevel 9009 goto sphinx_python -goto sphinx_ok - -:sphinx_python - -set SPHINXBUILD=python -m sphinx.__init__ -%SPHINXBUILD% 2> nul +%SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx @@ -69,195 +25,11 @@ if errorlevel 9009 ( exit /b 1 ) -:sphinx_ok - - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\DiskCache.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\DiskCache.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "coverage" ( - %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage - if errorlevel 1 exit /b 1 - echo. - echo.Testing of coverage in the sources finished, look at the ^ -results in %BUILDDIR%/coverage/python.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end +popd diff --git a/docs/sf-python-2017-meetup-talk.rst b/docs/sf-python-2017-meetup-talk.rst index 214b66b..5f79000 100644 --- a/docs/sf-python-2017-meetup-talk.rst +++ b/docs/sf-python-2017-meetup-talk.rst @@ -20,7 +20,7 @@ Landscape Backends -------- -* Backends have very different designs and tradeoffs. +* Backends have different designs and tradeoffs. Frameworks @@ -165,7 +165,7 @@ SQLite * Use a context manager for isolation level management. * Pragmas tune the behavior and performance of SQLite. - * Default is very robust and slow. + * Default is robust and slow. * Use write-ahead-log so writers don't block readers. * Memory-map pages for fast lookups. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index f4ff046..2eb454d 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -18,14 +18,14 @@ Pip & PyPI Installing :doc:`DiskCache ` is simple with `pip `_:: - $ pip install diskcache + $ pip install --upgrade diskcache -or, with `easy_install `_:: - - $ easy_install diskcache - -But `prefer pip `_ if at all -possible. +The versioning scheme uses `major.minor.micro` with `micro` intended for bug +fixes, `minor` intended for small features or improvements, and `major` +intended for significant new features and breaking changes. While it is +intended that only `major` version changes are backwards incompatible, it is +not always guaranteed. When running in production, it is recommended to pin at +least the `major` version. Get the Code ............ @@ -51,12 +51,12 @@ or install it into your site-packages easily:: $ python setup.py install :doc:`DiskCache ` is looking for a Debian package maintainer. If you can -help, please open an issue in the `DiskCache Issue Tracker -`_. +help, please open an issue in the `DiskCache Issue Tracker`_. -:doc:`DiskCache ` is looking for a CentOS/RPM package maintainer. If -you can help, please open an issue in the `DiskCache Issue Tracker -`_. +:doc:`DiskCache ` is looking for a CentOS/RPM package maintainer. If you +can help, please open an issue in the `DiskCache Issue Tracker`_. + +.. _`DiskCache Issue Tracker`: https://github.com/grantjenks/python-diskcache/issues/ .. _tutorial-cache: @@ -64,44 +64,53 @@ Cache ----- The core of :doc:`DiskCache ` is :class:`diskcache.Cache` which -represents a disk and file backed cache. As a Cache it supports a familiar -Python Mapping interface with additional cache and performance parameters. +represents a disk and file backed cache. As a Cache, it supports a familiar +Python mapping interface with additional cache and performance parameters. >>> from diskcache import Cache - >>> cache = Cache('/tmp/mycachedir') + >>> cache = Cache() + +Initialization expects a directory path reference. If the directory path does +not exist, it will be created. When not specified, a temporary directory is +automatically created. Additional keyword parameters are discussed below. Cache +objects are thread-safe and may be shared between threads. Two Cache objects +may also reference the same directory from separate threads or processes. In +this way, they are also process-safe and support cross-process communication. + +Cache objects open and maintain one or more file handles. But unlike files, all +Cache operations are atomic and Cache objects support process-forking and may +be serialized using Pickle. Each thread that accesses a cache should also call +:meth:`close <.Cache.close>` on the cache. Cache objects can be used +in a `with` statement to safeguard calling :meth:`close +`. -Initialization requires a directory path reference. If the directory path does -not exist, it will be created. Additional keyword parameters are discussed -below. Cache objects are thread-safe and may be shared between threads. Two -Cache objects may also reference the same directory from separate threads or -processes. In this way, they are also process-safe and support cross-process -communication. + >>> cache.close() + >>> with Cache(cache.directory) as reference: + ... reference.set('key', 'value') + True -When created, Cache objects open and maintain a file handle. As such, they do -not survive process forking but they may be serialized using Pickle. Each -thread that accesses a cache is also responsible for calling :meth:`close -` on the cache. You can use a Cache reference in a -`with` statement to safeguard calling :meth:`close `. +Closed Cache objects will automatically re-open when accessed. But opening +Cache objects is relatively slow, and since all operations are atomic, may be +safely left open. >>> cache.close() - >>> with Cache('/tmp/mycachedir') as reference: - ... pass + >>> cache.get('key') # Automatically opens, but slower. + 'value' Set an item, get a value, and delete a key using the usual operators: - >>> cache = Cache('/tmp/mycachedir') - >>> cache[b'key'] = b'value' - >>> cache[b'key'] + >>> cache['key'] = 'value' + >>> cache['key'] 'value' - >>> b'key' in cache + >>> 'key' in cache True - >>> del cache[b'key'] + >>> del cache['key'] There's also a :meth:`set ` method with additional keyword parameters: `expire`, `read`, and `tag`. >>> from io import BytesIO - >>> cache.set(b'key', BytesIO('value'), expire=5, read=True, tag=u'data') + >>> cache.set('key', BytesIO(b'value'), expire=5, read=True, tag='data') True In the example above: the key expires in 5 seconds, the value is read as a @@ -109,16 +118,27 @@ file-like object, and tag metadata is stored with the key. Another method, :meth:`get ` supports querying extra information with `default`, `read`, `expire_time`, and `tag` keyword parameters. - >>> cache.get(b'key', default=b'', read=True, expire_time=True, tag=True) - (<_io.BufferedReader - name=u'/tmp/mycachedir/1d/6e/128a921c3b8a9027c1f69989f3ac.val'>, - 1457066214.784396, - u'data') + >>> result = cache.get('key', read=True, expire_time=True, tag=True) + >>> reader, timestamp, tag = result + >>> print(reader.read().decode()) + value + >>> type(timestamp).__name__ + 'float' + >>> print(tag) + data The return value is a tuple containing the value, expire time (seconds from epoch), and tag. Because we passed ``read=True`` the value is returned as a file-like object. +Use :meth:`touch <.Cache.touch>` to update the expiration time of an item in +the cache. + + >>> cache.touch('key', expire=None) + True + >>> cache.touch('does-not-exist', expire=1) + False + Like :meth:`set `, the method :meth:`add ` can be used to insert an item in the cache. The item is inserted only if the key is not already present. @@ -144,45 +164,58 @@ Increment and decrement methods also support a keyword parameter, `default`, which will be used for missing keys. When ``None``, incrementing or decrementing a missing key will raise a :exc:`KeyError`. - >>> cache.incr(u'alice') + >>> cache.incr('alice') 1 - >>> cache.decr(u'bob', default=-9) + >>> cache.decr('bob', default=-9) -10 - >>> cache.incr(u'carol', default=None) + >>> cache.incr('carol', default=None) Traceback (most recent call last): ... - KeyError: u'carol' + KeyError: 'carol' Increment and decrement operations are atomic and assume the value may be -stored in a SQLite column. Most builds that target machines with 64-bit pointer -widths will support 64-bit signed integers. +stored in a SQLite integer column. SQLite supports 64-bit signed integers. Like :meth:`delete ` and :meth:`get `, the method :meth:`pop ` can be used to delete an item in the cache and return its value. - >>> cache.pop(u'alice') + >>> cache.pop('alice') 1 - >>> cache.pop(u'dave', default=u'does not exist') - u'does not exist' - >>> cache.set(u'dave', 0, expire=None, tag=u'admin') - >>> cache.pop(u'dave', expire_time=True, tag=True) - (0, None, u'admin') + >>> cache.pop('dave', default='does not exist') + 'does not exist' + >>> cache.set('dave', 0, expire=None, tag='admin') + True + >>> result = cache.pop('dave', expire_time=True, tag=True) + >>> value, timestamp, tag = result + >>> value + 0 + >>> print(timestamp) + None + >>> print(tag) + admin The :meth:`pop ` operation is atomic and using :meth:`incr ` together is an accurate method for counting and dumping statistics in long-running systems. Unlike :meth:`get ` the `read` argument is not supported. -Another four methods remove items from the cache. +.. _tutorial-culling: + +Another four methods remove items from the cache:: + >>> cache.clear() + 3 >>> cache.reset('cull_limit', 0) # Disable automatic evictions. + 0 >>> for num in range(10): - ... cache.set(num, num, expire=0) # Expire immediately. + ... _ = cache.set(num, num, expire=1e-9) # Expire immediately. >>> len(cache) 10 >>> list(cache) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> import time + >>> time.sleep(1) >>> cache.expire() 10 @@ -196,8 +229,9 @@ items you must explicitly call :meth:`expire ` which works regardless of the :ref:`cull_limit `. >>> for num in range(100): - ... cache.set(num, num, tag=u'odd' if num % 2 else u'even') - >>> cache.evict(u'even') + ... _ = cache.set(num, num, tag='odd' if num % 2 else 'even') + >>> cache.evict('even') + 50 .. _tutorial-tag-index: @@ -206,10 +240,12 @@ tag. The default tag is ``None``. Tag values may be any of integer, float, string, bytes and None. To accelerate the eviction of items by tag, an index can be created. To do so, initialize the cache with ``tag_index=True``. - >>> cache = Cache('/tmp/mycachedir', tag_index=True) + >>> cache.clear() + 50 >>> for num in range(100): - ... cache.set(num, num, tag=(num % 2)) + ... _ = cache.set(num, num, tag=(num % 2)) >>> cache.evict(0) + 50 Likewise, the tag index may be created or dropped using methods:: @@ -229,37 +265,91 @@ removing expired items from the cache and then uses the eviction policy to remove items until the cache volume is less than the size limit. >>> cache.clear() + 50 >>> cache.reset('size_limit', int(1e6)) + 1000000 >>> cache.reset('cull_limit', 0) + 0 >>> for count in range(1000): - >>> cache[count] = b'A' * 1000 - >>> cache.volume() - 1437696 - >>> cache.cull() - 320 - >>> cache.volume() - 999424 + ... cache[count] = b'A' * 1000 + >>> cache.volume() > int(1e6) + True + >>> cache.cull() > 0 + True + >>> cache.volume() < int(1e6) + True Some users may defer all culling to a cron-like process by setting the -:ref:`cull_limit ` to zero and calling :meth:`cull -` to manually remove items. Like :meth:`evict +:ref:`cull_limit ` to zero and manually calling :meth:`cull +` to remove items. Like :meth:`evict ` and :meth:`expire `, calls to :meth:`cull ` will work regardless of the :ref:`cull_limit `. :meth:`Clear ` simply removes all items from the cache. - >>> cache.clear() + >>> cache.clear() > 0 + True Each of these methods is designed to work concurrent to others. None of them block readers or writers in other threads or processes. +Caches may be iterated by either insertion order or sorted order. The default +ordering uses insertion order. To iterate by sorted order, use :meth:`iterkeys +<.Cache.iterkeys>`. The sort order is determined by the database which makes it +valid only for `str`, `bytes`, `int`, and `float` data types. Other types of +keys will be serialized which is likely to have a meaningless sorted order. + + >>> for key in 'cab': + ... cache[key] = None + >>> list(cache) + ['c', 'a', 'b'] + >>> list(cache.iterkeys()) + ['a', 'b', 'c'] + >>> cache.peekitem() + ('b', None) + >>> cache.peekitem(last=False) + ('c', None) + +If only the first or last item in insertion order is desired then +:meth:`peekitem <.Cache.peekitem>` is more efficient than using iteration. + +Three additional methods use the sorted ordering of keys to maintain a +queue-like data structure within the cache. The :meth:`push <.Cache.push>`, +:meth:`pull <.Cache.pull>`, and :meth:`peek <.Cache.peek>` methods +automatically assign the key within the cache. + + >>> key = cache.push('first') + >>> print(key) + 500000000000000 + >>> cache[key] + 'first' + >>> _ = cache.push('second') + >>> _ = cache.push('zeroth', side='front') + >>> _, value = cache.peek() + >>> value + 'zeroth' + >>> key, value = cache.pull() + >>> print(key) + 499999999999999 + >>> value + 'zeroth' + +The `side` parameter supports access to either the ``'front'`` or ``'back'`` of +the cache. In addition, the `prefix` parameter can be used to maintain multiple +queue-like data structures within a single cache. When prefix is ``None``, +integer keys are used. Otherwise, string keys are used in the format +“prefix-integer”. Integer starts at 500 trillion. Like :meth:`set <.Cache.set>` +and :meth:`get <.Cache.get>`, methods :meth:`push <.Cache.push>`, :meth:`pull +<.Cache.pull>`, and :meth:`peek <.Cache.peek>` support cache metadata like the +expiration time and tag. + Lastly, three methods support metadata about the cache. The first is :meth:`volume ` which returns the estimated total size in bytes of the cache directory on disk. - >>> cache.volume() - 9216 + >>> cache.volume() < int(1e5) + True .. _tutorial-statistics: @@ -269,11 +359,12 @@ and misses. Cache statistics must first be enabled. >>> cache.stats(enable=True) (0, 0) >>> for num in range(100): - ... cache.set(num, num) + ... _ = cache.set(num, num) >>> for num in range(150): - ... cache.get(num) - >>> cache.stats(enable=False, reset=True) - (100, 50) # 100 hits, 50 misses + ... _ = cache.get(num) + >>> hits, misses = cache.stats(enable=False, reset=True) + >>> (hits, misses) + (100, 50) Cache statistics are useful when evaluating different :ref:`eviction policies `. By default, statistics are disabled as they @@ -281,12 +372,23 @@ incur an extra overhead on cache lookups. Increment and decrement operations are not counted in cache statistics. The third is :meth:`check ` which verifies cache -consistency. It can also fix inconsistencies and reclaim unused space. +consistency. It can also fix inconsistencies and reclaim unused space. The +return value is a list of warnings. - >>> cache.check(fix=True) - [] + >>> warnings = cache.check() -The return value is a list of warnings. +Caches do not automatically remove the underlying directory where keys and +values are stored. The cache is intended to be persistent and so must be +deleted manually. + + >>> cache.close() + >>> import shutil + >>> try: + ... shutil.rmtree(cache.directory) + ... except OSError: # Windows wonkiness + ... pass + +To permanently delete the cache, recursively remove the cache's directory. .. _tutorial-fanoutcache: @@ -302,34 +404,46 @@ suggested. This will depend on your scenario. The default value is 8. Another parameter, `timeout`, sets a limit on how long to wait for database transactions. Transactions are used for every operation that writes to the -database. The `timeout` parameter is also present on -:class:`diskcache.Cache`. When a :exc:`diskcache.Timeout` error occurs in -:class:`Cache ` methods, the exception is raised to the -caller. In contrast, :class:`FanoutCache ` catches -timeout errors and aborts the operation. As a result, :meth:`set +database. When the timeout expires, a :exc:`diskcache.Timeout` error is raised +internally. This `timeout` parameter is also present on +:class:`diskcache.Cache`. When a :exc:`Timeout ` error +occurs in :class:`Cache ` methods, the exception may be raised +to the caller. In contrast, :class:`FanoutCache ` +catches all timeout errors and aborts the operation. As a result, :meth:`set ` and :meth:`delete ` -methods may silently fail. Most methods that handle :exc:`Timeout -` exceptions also include a `retry` keyword parameter -(default ``False``) to automatically repeat attempts that -timeout. :class:`FanoutCache ` will never raise a -:exc:`Timeout ` exception. The default `timeout` is 0.010 -(10 milliseconds). +methods may silently fail. + +Most methods that handle :exc:`Timeout ` exceptions also +include a `retry` keyword parameter (default ``False``) to automatically repeat +attempts that timeout. The mapping interface operators: :meth:`cache[key] +`, :meth:`cache[key] = value +`, and :meth:`del cache[key] +` automatically retry operations when +:exc:`Timeout ` errors occur. :class:`FanoutCache +` will never raise a :exc:`Timeout ` +exception. The default `timeout` is 0.010 (10 milliseconds). >>> from diskcache import FanoutCache - >>> cache = FanoutCache('/tmp/mycachedir', shards=4, timeout=1) + >>> cache = FanoutCache(shards=4, timeout=1) -The example above creates a cache in the local ``/tmp/mycachedir`` directory -with four shards and a one second timeout. Operations will attempt to abort if -they take longer than one second. The remaining API of :class:`FanoutCache -` matches :class:`Cache ` as described -above. +The example above creates a cache in a temporary directory with four shards and +a one second timeout. Operations will attempt to abort if they take longer than +one second. The remaining API of :class:`FanoutCache ` +matches :class:`Cache ` as described above. -:class:`FanoutCache ` adds an additional feature: -:meth:`memoizing ` cache decorator. The -decorator wraps a callable and caches arguments and return values. +The :class:`.FanoutCache` :ref:`size_limit ` is used as the total +size of the cache. The size limit of individual cache shards is the total size +divided by the number of shards. In the example above, the default total size +is one gigabyte and there are four shards so each cache shard has a size limit +of 256 megabytes. Items that are larger than the size limit are immediately +culled. + +Caches have an additional feature: :meth:`memoizing +` decorator. The decorator wraps a callable and +caches arguments and return values. >>> from diskcache import FanoutCache - >>> cache = FanoutCache('/tmp/diskcache/fanoutcache') + >>> cache = FanoutCache() >>> @cache.memoize(typed=True, expire=1, tag='fib') ... def fibonacci(number): ... if number == 0: @@ -338,14 +452,14 @@ decorator wraps a callable and caches arguments and return values. ... return 1 ... else: ... return fibonacci(number - 1) + fibonacci(number - 2) - >>> print(sum(fibonacci(number=value) for value in range(100))) + >>> print(sum(fibonacci(value) for value in range(100))) 573147844013817084100 The arguments to memoize are like those for `functools.lru_cache `_ and -:meth:`FanoutCache.set `. Remember to call -:meth:`memoize ` when decorating a callable. If -you forget, then a TypeError will occur. +:meth:`Cache.set <.Cache.set>`. Remember to call :meth:`memoize +<.FanoutCache.memoize>` when decorating a callable. If you forget, then a +TypeError will occur:: >>> @cache.memoize ... def test(): @@ -355,7 +469,7 @@ you forget, then a TypeError will occur. TypeError: name cannot be callable Observe the lack of parenthenses after :meth:`memoize -` above. +` above. .. _`Sharding`: https://en.wikipedia.org/wiki/Shard_(database_architecture) @@ -375,18 +489,23 @@ DjangoCache 'default': { 'BACKEND': 'diskcache.DjangoCache', 'LOCATION': '/path/to/cache/directory', - 'SHARDS': 4, - 'DATABASE_TIMEOUT': 1.0, + 'TIMEOUT': 300, + # ^-- Django setting for default timeout of each key. + 'SHARDS': 8, + 'DATABASE_TIMEOUT': 0.010, # 10 milliseconds + # ^-- Timeout for each DjangoCache database transaction. 'OPTIONS': { - 'size_limit': 2 ** 32 # 4 gigabytes + 'size_limit': 2 ** 30 # 1 gigabyte }, }, } As with :class:`FanoutCache ` above, these settings -create a Django-compatible cache with four shards and a one second timeout. You -can pass further settings via the ``OPTIONS`` mapping as shown in the Django -documentation. :class:`DjangoCache ` will never raise a +create a Django-compatible cache with eight shards and a 10ms timeout. You can +pass further settings via the ``OPTIONS`` mapping as shown in the Django +documentation. Only the ``BACKEND`` and ``LOCATION`` keys are necessary in the +above example. The other keys simply display their default +value. :class:`DjangoCache ` will never raise a :exc:`Timeout ` exception. But unlike :class:`FanoutCache `, the keyword parameter `retry` defaults to ``True`` for :class:`DjangoCache ` methods. @@ -416,7 +535,7 @@ they are guaranteed to be stored in files. The full path is available on the file handle in the `name` attribute. Remember to also include the `Content-Type` header if known. -.. _`Django documentation on caching`: https://docs.djangoproject.com/en/1.9/topics/cache/#the-low-level-cache-api +.. _`Django documentation on caching`: https://docs.djangoproject.com/en/3.2/topics/cache/#the-low-level-cache-api Deque ----- @@ -426,8 +545,9 @@ Deque `_-compatible double-ended queue. Deques are a generalization of stacks and queues with fast access and editing at both front and back sides. :class:`Deque -` objects inherit the benefits of the :class:`Cache -` objects but never evict items. +` objects use the :meth:`push <.Cache.push>`, :meth:`pull +<.Cache.pull>`, and :meth:`peek <.Cache.peek>` methods of :class:`Cache +<.Cache>` objects but never evict or expire items. >>> from diskcache import Deque >>> deque = Deque(range(5, 10)) @@ -438,18 +558,23 @@ access and editing at both front and back sides. :class:`Deque >>> deque.appendleft('foo') >>> len(deque) 4 - >>> deque.directory - '/tmp/...' + >>> type(deque.directory).__name__ + 'str' >>> other = Deque(directory=deque.directory) >>> len(other) 4 >>> other.popleft() 'foo' + >>> thing = Deque('abcde', maxlen=3) + >>> list(thing) + ['c', 'd', 'e'] :class:`Deque ` objects provide an efficient and safe means of cross-thread and cross-process communication. :class:`Deque ` objects are also useful in scenarios where contents should remain persistent or -limitations prohibit holding all items in memory at the same time. +limitations prohibit holding all items in memory at the same time. The deque +uses a fixed amount of memory regardless of the size or number of items stored +inside it. Index ----- @@ -459,8 +584,8 @@ Index `_ and `ordered dictionary `_ -interface. :class:`Index ` objects inherit the benefits of -:class:`Cache ` objects but never evict items. +interface. :class:`Index ` objects inherit all the benefits of +:class:`Cache ` objects but never evict or expire items. >>> from diskcache import Index >>> index = Index([('a', 1), ('b', 2), ('c', 3)]) @@ -480,7 +605,86 @@ interface. :class:`Index ` objects inherit the benefits of :class:`Index ` objects provide an efficient and safe means of cross-thread and cross-process communication. :class:`Index ` objects are also useful in scenarios where contents should remain persistent or -limitations prohibit holding all items in memory at the same time. +limitations prohibit holding all items in memory at the same time. The index +uses a fixed amount of memory regardless of the size or number of items stored +inside it. + +.. _tutorial-transactions: + +Transactions +------------ + +Transactions are implemented by the :class:`.Cache`, :class:`.Deque`, and +:class:`.Index` data types and support consistency and improved +performance. Use transactions to guarantee a group of operations occur +atomically. For example, to calculate a running average, the total and count +could be incremented together:: + + >>> with cache.transact(): + ... total = cache.incr('total', 123.45) + ... count = cache.incr('count') + >>> total + 123.45 + >>> count + 1 + +And to calculate the average, the values could be retrieved together: + + >>> with cache.transact(): + ... total = cache.get('total') + ... count = cache.get('count') + >>> average = None if count == 0 else total / count + >>> average + 123.45 + +Keep transactions as short as possible because within a transaction, no other +writes may occur to the cache. Every write operation uses a transaction and +transactions may be nested to improve performance. For example, a possible +implementation to set many items within the cache:: + + >>> def set_many(cache, mapping): + ... with cache.transact(): + ... for key, value in mapping.items(): + ... cache[key] = value + +By grouping all operations in a single transaction, performance may improve two +to five times. But be careful, a large mapping will block other concurrent +writers. + +Transactions are not implemented by :class:`.FanoutCache` and +:class:`.DjangoCache` due to key sharding. Instead, a cache shard with +transaction support may be requested. + + >>> fanout_cache = FanoutCache() + >>> tutorial_cache = fanout_cache.cache('tutorial') + >>> username_queue = fanout_cache.deque('usernames') + >>> url_to_response = fanout_cache.index('responses') + +The cache shard exists in a subdirectory of the fanout-cache with the given +name. + +.. _tutorial-recipes: + +Recipes +------- + +:doc:`DiskCache ` includes a few synchronization recipes for +cross-thread and cross-process communication: + +* :class:`.Averager` -- maintains a running average like that shown above. +* :class:`.Lock`, :class:`.RLock`, and :class:`.BoundedSemaphore` -- recipes + for synchronization around critical sections like those found in Python's + `threading`_ and `multiprocessing`_ modules. +* :func:`throttle <.throttle>` -- function decorator to rate-limit calls to a + function. +* :func:`barrier <.barrier>` -- function decorator to synchronize calls to a + function. +* :func:`memoize_stampede <.memoize_stampede>` -- memoizing function decorator + with cache stampede protection. Read :doc:`case-study-landing-page-caching` + for a comparison of memoization strategies. + +.. _threading: https://docs.python.org/3/library/threading.html +.. _multiprocessing: https://docs.python.org/3/library/multiprocessing.html .. _tutorial-settings: @@ -515,11 +719,11 @@ are updated lazily. Prefer idioms like :meth:`len ` rather than using :meth:`reset ` directly. - >>> cache = Cache('/tmp/mycachedir', size_limit=int(4e9)) - >>> cache.size_limit + >>> cache = Cache(size_limit=int(4e9)) + >>> print(cache.size_limit) 4000000000 >>> cache.disk_min_file_size - 1024 + 32768 >>> cache.reset('cull_limit', 0) # Disable automatic evictions. 0 >>> cache.set(b'key', 1.234) @@ -534,7 +738,7 @@ these may be specified when initializing the :ref:`Cache `. Changing these values will update the unprefixed attribute on the :class:`Disk ` object. -* `disk_min_file_size`, default one kilobyte. The minimum size to store a value +* `disk_min_file_size`, default 32 kilobytes. The minimum size to store a value in a file. * `disk_pickle_protocol`, default highest Pickle protocol. The Pickle protocol to use for data types that are not natively supported. @@ -561,34 +765,39 @@ accessible at :data:`diskcache.DEFAULT_SETTINGS`. Eviction Policies ----------------- -:doc:`DiskCache ` supports three eviction policies each with different +:doc:`DiskCache ` supports four eviction policies each with different tradeoffs for accessing and storing items. -* `Least Recently Stored` is the default. Every cache item records the time it - was stored in the cache. This policy adds an index to that field. On access, - no update is required. Keys are evicted starting with the oldest stored - keys. As :doc:`DiskCache ` was intended for large caches (gigabytes) - this policy usually works well enough in practice. -* `Least Recently Used` is the most commonly used policy. An index is added to - the access time field stored in the cache database. On every access, the +* ``"least-recently-stored"`` is the default. Every cache item records the time + it was stored in the cache. This policy adds an index to that field. On + access, no update is required. Keys are evicted starting with the oldest + stored keys. As :doc:`DiskCache ` was intended for large caches + (gigabytes) this policy usually works well enough in practice. +* ``"least-recently-used"`` is the most commonly used policy. An index is added + to the access time field stored in the cache database. On every access, the field is updated. This makes every access into a read and write which slows accesses. -* `Least Frequently Used` works well in some cases. An index is added to the - access count field stored in the cache database. On every access, the field - is incremented. Every access therefore requires writing the database which - slows accesses. +* ``"least-frequently-used"`` works well in some cases. An index is added to + the access count field stored in the cache database. On every access, the + field is incremented. Every access therefore requires writing the database + which slows accesses. +* ``"none"`` disables cache evictions. Caches will grow without bound. Cache + items will still be lazily removed if they expire. The persistent data types, + :class:`.Deque` and :class:`.Index`, use the ``"none"`` eviction policy. For + :ref:`lazy culling ` use the :ref:`cull_limit ` + setting instead. All clients accessing the cache are expected to use the same eviction policy. The policy can be set during initialization using a keyword argument. - >>> cache = Cache('/tmp/mydir') - >>> cache.eviction_policy - u'least-recently-stored' - >>> cache = Cache('/tmp/mydir', eviction_policy=u'least-frequently-used') - >>> cache.eviction_policy - u'least-frequently-used' - >>> cache.reset('eviction_policy', u'least-recently-used') - u'least-recently-used' + >>> cache = Cache() + >>> print(cache.eviction_policy) + least-recently-stored + >>> cache = Cache(eviction_policy='least-frequently-used') + >>> print(cache.eviction_policy) + least-frequently-used + >>> print(cache.reset('eviction_policy', 'least-recently-used')) + least-recently-used Though the eviction policy is changed, the previously created indexes will not be dropped. Prefer to always specify the eviction policy as a keyword argument @@ -607,39 +816,38 @@ database while values are sometimes stored separately in files. To customize serialization, you may pass in a :class:`Disk ` subclass to initialize the cache. All clients accessing the cache are expected to use the same serialization. The default implementation uses Pickle and the -example below uses compressed JSON. +example below uses compressed JSON, available for convenience as +:class:`JSONDisk `. .. code-block:: python - import json, zlib - class JSONDisk(diskcache.Disk): def __init__(self, directory, compress_level=1, **kwargs): self.compress_level = compress_level - super(JSONDisk, self).__init__(directory, **kwargs) + super().__init__(directory, **kwargs) def put(self, key): json_bytes = json.dumps(key).encode('utf-8') data = zlib.compress(json_bytes, self.compress_level) - return super(JSONDisk, self).put(data) + return super().put(data) def get(self, key, raw): - data = super(JSONDisk, self).get(key, raw) + data = super().get(key, raw) return json.loads(zlib.decompress(data).decode('utf-8')) - def store(self, value, read): + def store(self, value, read, key=UNKNOWN): if not read: json_bytes = json.dumps(value).encode('utf-8') value = zlib.compress(json_bytes, self.compress_level) - return super(JSONDisk, self).store(value, read) + return super().store(value, read, key=key) def fetch(self, mode, filename, value, read): - data = super(JSONDisk, self).fetch(mode, filename, value, read) + data = super().fetch(mode, filename, value, read) if not read: data = json.loads(zlib.decompress(data).decode('utf-8')) return data - with Cache('/tmp/dir', disk=JSONDisk, disk_compress_level=6) as cache: + with Cache(disk=JSONDisk, disk_compress_level=6) as cache: pass Four data types can be stored natively in the cache metadata database: @@ -656,24 +864,60 @@ protocol`_ is not used. Neither the `__hash__` nor `__eq__` methods are used for lookups. Instead lookups depend on the serialization method defined by :class:`Disk ` objects. For strings, bytes, integers, and floats, equality matches Python's definition. But large integers and all other -types will be converted to bytes using pickling and the bytes representation -will define equality. - -:doc:`DiskCache ` uses SQLite to synchronize database access between -threads and processes and as such inherits all SQLite caveats. Most notably -SQLite is `not recommended`_ for use with Network File System (NFS) mounts. For -this reason, :doc:`DiskCache ` currently `performs poorly`_ on `Python -Anywhere`_. Users have also reported issues running inside of `Parallels`_ -shared folders. - +types will be converted to bytes and the bytes representation will define +equality. + +The default :class:`diskcache.Disk` serialization uses pickling for both keys +and values. Unfortunately, pickling produces inconsistencies sometimes when +applied to container data types like tuples. Two equal tuples may serialize to +different bytes objects using pickle. The likelihood of differences is reduced +by using `pickletools.optimize` but still inconsistencies occur (`#54`_). The +inconsistent serialized pickle values is particularly problematic when applied +to the key in the cache. Consider using an alternative Disk type, like +:class:`JSONDisk `, for consistent serialization of keys. + +SQLite is used to synchronize database access between threads and processes and +as such inherits all SQLite caveats. Most notably SQLite is `not recommended`_ +for use with Network File System (NFS) mounts. For this reason, :doc:`DiskCache +` currently `performs poorly`_ on `Python Anywhere`_. Users have also +reported issues running inside of `Parallels`_ shared folders. + +When the disk or database is full, a :exc:`sqlite3.OperationalError` will be +raised from any method that attempts to write data. Read operations will still +succeed so long as they do not cause any write (as might occur if cache +statistics are being recorded). + +Asynchronous support using Python's ``async`` and ``await`` keywords and +`asyncio`_ module is blocked by a lack of support in the underlying SQLite +module. But it is possible to run :doc:`DiskCache ` methods in a +thread-pool executor asynchronously. For example:: + + import asyncio + + async def set_async(key, val): + loop = asyncio.get_running_loop() + future = loop.run_in_executor(None, cache.set, key, val) + result = await future + return result + + asyncio.run(set_async('test-key', 'test-value')) + +The cache :meth:`volume ` is based on the size of the +database that stores metadata and the size of the values stored in files. It +does not account the size of directories themselves or other filesystem +metadata. If directory count or size is a concern then consider implementing an +alternative :class:`Disk `. + +.. _`#54`: https://github.com/grantjenks/python-diskcache/issues/54 .. _`hash protocol`: https://docs.python.org/library/functions.html#hash .. _`not recommended`: https://www.sqlite.org/faq.html#q5 .. _`performs poorly`: https://www.pythonanywhere.com/forums/topic/1847/ .. _`Python Anywhere`: https://www.pythonanywhere.com/ .. _`Parallels`: https://www.parallels.com/ +.. _`asyncio`: https://docs.python.org/3/library/asyncio.html -Implementation Notes --------------------- +Implementation +-------------- :doc:`DiskCache ` is mostly built on SQLite and the filesystem. Some techniques used to improve performance: diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..053b283 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,4 @@ +[mypy] + +[mypy-django.*] +ignore_missing_imports = True diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..6149361 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,23 @@ +-e . +blue +coverage +django==4.2.* +django_redis +doc8 +flake8 +ipython +jedi +pickleDB +pylibmc +pylint +pytest +pytest-cov +pytest-django +pytest-env +pytest-xdist +rstcheck +sphinx +sqlitedict +tox +twine +wheel diff --git a/requirements.txt b/requirements.txt index f59ec75..d6e1198 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1 @@ -mock==1.3.0 -nose==1.3.7 -django>=1.11,<1.12 +-e . diff --git a/setup.py b/setup.py index 0f00554..841dfb9 100644 --- a/setup.py +++ b/setup.py @@ -1,36 +1,44 @@ -import io -from setuptools import setup, find_packages +from setuptools import setup from setuptools.command.test import test as TestCommand -import sys import diskcache + class Tox(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) self.test_args = [] self.test_suite = True + def run_tests(self): import tox + errno = tox.cmdline(self.test_args) - sys.exit(errno) + exit(errno) -with io.open('README.rst', encoding='UTF-8') as reader: + +with open('README.rst', encoding='utf-8') as reader: readme = reader.read() setup( - name='diskcache', + name=diskcache.__title__, version=diskcache.__version__, - description='Disk and file backed cache.', + description='Disk Cache -- Disk and file backed persistent cache.', long_description=readme, author='Grant Jenks', author_email='contact@grantjenks.com', url='http://www.grantjenks.com/docs/diskcache/', - packages=find_packages(exclude=('tests', 'docs')), - package_data={'': ['LICENSE', 'README.rst']}, + project_urls={ + 'Documentation': 'http://www.grantjenks.com/docs/diskcache/', + 'Funding': 'https://gum.co/diskcache', + 'Source': 'https://github.com/grantjenks/python-diskcache', + 'Tracker': 'https://github.com/grantjenks/python-diskcache/issues', + }, + license='Apache 2.0', + packages=['diskcache'], tests_require=['tox'], cmdclass={'test': Tox}, - license='Apache 2.0', + python_requires='>=3', install_requires=[], classifiers=( 'Development Status :: 5 - Production/Stable', @@ -38,13 +46,12 @@ def run_tests(self): 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', ), ) diff --git a/tests/benchmark_core.py b/tests/benchmark_core.py index 1811de0..7d64595 100644 --- a/tests/benchmark_core.py +++ b/tests/benchmark_core.py @@ -3,26 +3,17 @@ $ export PYTHONPATH=/Users/grantj/repos/python-diskcache $ python tests/benchmark_core.py -p 1 > tests/timings_core_p1.txt $ python tests/benchmark_core.py -p 8 > tests/timings_core_p8.txt - """ -from __future__ import print_function - import collections as co import multiprocessing as mp import os +import pickle import random import shutil -import sys import time import warnings -if sys.hexversion < 0x03000000: - range = xrange - import cPickle as pickle -else: - import pickle - from utils import display PROCS = 8 @@ -37,21 +28,32 @@ # Disk Cache Benchmarks ############################################################################### -import diskcache +import diskcache # noqa -caches.append(('diskcache.Cache', diskcache.Cache, ('tmp',), {},)) -caches.append(( - 'diskcache.FanoutCache(shards=4, timeout=1.0)', - diskcache.FanoutCache, - ('tmp',), - {'shards': 4, 'timeout': 1.0} -)) -caches.append(( - 'diskcache.FanoutCache(shards=8, timeout=0.010)', - diskcache.FanoutCache, - ('tmp',), - {'shards': 8, 'timeout': 0.010} -)) +caches.append( + ( + 'diskcache.Cache', + diskcache.Cache, + ('tmp',), + {}, + ) +) +caches.append( + ( + 'diskcache.FanoutCache(shards=4, timeout=1.0)', + diskcache.FanoutCache, + ('tmp',), + {'shards': 4, 'timeout': 1.0}, + ) +) +caches.append( + ( + 'diskcache.FanoutCache(shards=8, timeout=0.010)', + diskcache.FanoutCache, + ('tmp',), + {'shards': 8, 'timeout': 0.010}, + ) +) ############################################################################### @@ -61,12 +63,17 @@ try: import pylibmc - caches.append(( - 'pylibmc.Client', - pylibmc.Client, - (['127.0.0.1'],), - {'binary': True, 'behaviors': {'tcp_nodelay': True, 'ketama': True}}, - )) + caches.append( + ( + 'pylibmc.Client', + pylibmc.Client, + (['127.0.0.1'],), + { + 'binary': True, + 'behaviors': {'tcp_nodelay': True, 'ketama': True}, + }, + ) + ) except ImportError: warnings.warn('skipping pylibmc') @@ -78,12 +85,14 @@ try: import redis - caches.append(( - 'redis.StrictRedis', - redis.StrictRedis, - (), - {'host': 'localhost', 'port': 6379, 'db': 0}, - )) + caches.append( + ( + 'redis.StrictRedis', + redis.StrictRedis, + (), + {'host': 'localhost', 'port': 6379, 'db': 0}, + ) + ) except ImportError: warnings.warn('skipping redis') @@ -91,7 +100,7 @@ def worker(num, kind, args, kwargs): random.seed(num) - time.sleep(0.01) # Let other processes start. + time.sleep(0.01) # Let other processes start. obj = kind(*args, **kwargs) @@ -112,13 +121,13 @@ def worker(num, kind, args, kwargs): start = time.time() result = obj.set(key, value) end = time.time() - miss = result == False + miss = result is False action = 'set' else: start = time.time() result = obj.delete(key) end = time.time() - miss = result == False + miss = result is False action = 'delete' if count > WARMUP: @@ -143,7 +152,7 @@ def dispatch(): try: obj.close() - except: + except Exception: pass processes = [ @@ -180,19 +189,31 @@ def dispatch(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - '-p', '--processes', type=int, default=PROCS, + '-p', + '--processes', + type=int, + default=PROCS, help='Number of processes to start', ) parser.add_argument( - '-n', '--operations', type=float, default=OPS, + '-n', + '--operations', + type=float, + default=OPS, help='Number of operations to perform', ) parser.add_argument( - '-r', '--range', type=int, default=RANGE, + '-r', + '--range', + type=int, + default=RANGE, help='Range of keys', ) parser.add_argument( - '-w', '--warmup', type=float, default=WARMUP, + '-w', + '--warmup', + type=float, + default=WARMUP, help='Number of warmup operations before timings', ) diff --git a/tests/benchmark_djangocache.py b/tests/benchmark_djangocache.py index 4a0c81b..61a80bf 100644 --- a/tests/benchmark_djangocache.py +++ b/tests/benchmark_djangocache.py @@ -2,26 +2,15 @@ $ export PYTHONPATH=/Users/grantj/repos/python-diskcache $ python tests/benchmark_djangocache.py > tests/timings_djangocache.txt - - """ -from __future__ import print_function - import collections as co import multiprocessing as mp import os +import pickle import random import shutil -import sys import time -import warnings - -if sys.hexversion < 0x03000000: - range = xrange - import cPickle as pickle -else: - import pickle from utils import display @@ -32,8 +21,9 @@ def setup(): - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tests.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tests.settings_benchmark') import django + django.setup() @@ -48,7 +38,7 @@ def worker(num, name): timings = co.defaultdict(list) - time.sleep(0.01) # Let other processes start. + time.sleep(0.01) # Let other processes start. for count in range(OPS): key = str(random.randrange(RANGE)).encode('utf-8') @@ -65,13 +55,13 @@ def worker(num, name): start = time.time() result = obj.set(key, value) end = time.time() - miss = result == False + miss = result is False action = 'set' else: start = time.time() result = obj.delete(key) end = time.time() - miss = result == False + miss = result is False action = 'delete' if count > WARMUP: @@ -97,14 +87,14 @@ def prepare(name): try: obj.close() - except: + except Exception: pass def dispatch(): setup() - from django.core.cache import caches + from django.core.cache import caches # noqa for name in ['locmem', 'memcached', 'redis', 'diskcache', 'filebased']: shutil.rmtree('tmp', ignore_errors=True) @@ -147,19 +137,31 @@ def dispatch(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - '-p', '--processes', type=int, default=PROCS, + '-p', + '--processes', + type=int, + default=PROCS, help='Number of processes to start', ) parser.add_argument( - '-n', '--operations', type=float, default=OPS, + '-n', + '--operations', + type=float, + default=OPS, help='Number of operations to perform', ) parser.add_argument( - '-r', '--range', type=int, default=RANGE, + '-r', + '--range', + type=int, + default=RANGE, help='Range of keys', ) parser.add_argument( - '-w', '--warmup', type=float, default=WARMUP, + '-w', + '--warmup', + type=float, + default=WARMUP, help='Number of warmup operations before timings', ) diff --git a/tests/benchmark_glob.py b/tests/benchmark_glob.py index 0402ef8..7da5fd3 100644 --- a/tests/benchmark_glob.py +++ b/tests/benchmark_glob.py @@ -1,6 +1,4 @@ -"Benchmark glob.glob1 as used by django.core.cache.backends.filebased." - -from __future__ import print_function +"""Benchmark glob.glob1 as used by django.core.cache.backends.filebased.""" import os import os.path as op @@ -24,15 +22,13 @@ print(template % ('Count', 'Time')) print(' '.join(['=' * size] * len(cols))) -for count in [10 ** exp for exp in range(6)]: +for count in [10**exp for exp in range(6)]: for value in range(count): with open(op.join('tmp', '%s.tmp' % value), 'wb') as writer: pass - + delta = timeit.timeit( - stmt="glob.glob1('tmp', '*.tmp')", - setup='import glob', - number=100 + stmt="glob.glob1('tmp', '*.tmp')", setup='import glob', number=100 ) print(template % (count, secs(delta))) diff --git a/tests/benchmark_incr.py b/tests/benchmark_incr.py index 4a01628..4f758aa 100644 --- a/tests/benchmark_incr.py +++ b/tests/benchmark_incr.py @@ -1,9 +1,6 @@ """Benchmark cache.incr method. - """ -from __future__ import print_function - import json import multiprocessing as mp import shutil @@ -18,7 +15,7 @@ def worker(num): - "Rapidly increment key and time operation." + """Rapidly increment key and time operation.""" time.sleep(0.1) # Let other workers start. cache = dc.Cache('tmp') @@ -35,7 +32,7 @@ def worker(num): def main(): - "Run workers and print percentile results." + """Run workers and print percentile results.""" shutil.rmtree('tmp', ignore_errors=True) processes = [ diff --git a/tests/benchmark_kv_store.py b/tests/benchmark_kv_store.py new file mode 100644 index 0000000..7015470 --- /dev/null +++ b/tests/benchmark_kv_store.py @@ -0,0 +1,77 @@ +"""Benchmarking Key-Value Stores + +$ python -m IPython tests/benchmark_kv_store.py +""" + +from IPython import get_ipython + +import diskcache + +ipython = get_ipython() +assert ipython is not None, 'No IPython! Run with $ ipython ...' + +value = 'value' + +print('diskcache set') +dc = diskcache.FanoutCache('/tmp/diskcache') +ipython.magic("timeit -n 100 -r 7 dc['key'] = value") +print('diskcache get') +ipython.magic("timeit -n 100 -r 7 dc['key']") +print('diskcache set/delete') +ipython.magic("timeit -n 100 -r 7 dc['key'] = value; del dc['key']") + +try: + import dbm.gnu # Only trust GNU DBM +except ImportError: + print('Error: Cannot import dbm.gnu') + print('Error: Skipping import shelve') +else: + print('dbm set') + d = dbm.gnu.open('/tmp/dbm', 'c') + ipython.magic("timeit -n 100 -r 7 d['key'] = value; d.sync()") + print('dbm get') + ipython.magic("timeit -n 100 -r 7 d['key']") + print('dbm set/delete') + ipython.magic( + "timeit -n 100 -r 7 d['key'] = value; d.sync(); del d['key']; d.sync()" + ) + + import shelve + + print('shelve set') + s = shelve.open('/tmp/shelve') + ipython.magic("timeit -n 100 -r 7 s['key'] = value; s.sync()") + print('shelve get') + ipython.magic("timeit -n 100 -r 7 s['key']") + print('shelve set/delete') + ipython.magic( + "timeit -n 100 -r 7 s['key'] = value; s.sync(); del s['key']; s.sync()" + ) + +try: + import sqlitedict +except ImportError: + print('Error: Cannot import sqlitedict') +else: + print('sqlitedict set') + sd = sqlitedict.SqliteDict('/tmp/sqlitedict', autocommit=True) + ipython.magic("timeit -n 100 -r 7 sd['key'] = value") + print('sqlitedict get') + ipython.magic("timeit -n 100 -r 7 sd['key']") + print('sqlitedict set/delete') + ipython.magic("timeit -n 100 -r 7 sd['key'] = value; del sd['key']") + +try: + import pickledb +except ImportError: + print('Error: Cannot import pickledb') +else: + print('pickledb set') + p = pickledb.load('/tmp/pickledb', True) + ipython.magic("timeit -n 100 -r 7 p['key'] = value") + print('pickledb get') + ipython.magic( + "timeit -n 100 -r 7 p = pickledb.load('/tmp/pickledb', True); p['key']" + ) + print('pickledb set/delete') + ipython.magic("timeit -n 100 -r 7 p['key'] = value; del p['key']") diff --git a/tests/issue_109.py b/tests/issue_109.py new file mode 100644 index 0000000..a649c58 --- /dev/null +++ b/tests/issue_109.py @@ -0,0 +1,51 @@ +"""Benchmark for Issue #109 +""" + +import time + +import diskcache as dc + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--cache-dir', default='/tmp/test') + parser.add_argument('--iterations', type=int, default=100) + parser.add_argument('--sleep', type=float, default=0.1) + parser.add_argument('--size', type=int, default=25) + args = parser.parse_args() + + data = dc.FanoutCache(args.cache_dir) + delays = [] + values = {str(num): num for num in range(args.size)} + iterations = args.iterations + + for i in range(args.iterations): + print(f'Iteration {i + 1}/{iterations}', end='\r') + time.sleep(args.sleep) + for key, value in values.items(): + start = time.monotonic() + data[key] = value + stop = time.monotonic() + diff = stop - start + delays.append(diff) + + # Discard warmup delays, first two iterations. + del delays[: (len(values) * 2)] + + # Convert seconds to microseconds. + delays = sorted(delay * 1e6 for delay in delays) + + # Display performance. + print() + print(f'Total #: {len(delays)}') + print(f'Min delay (us): {delays[0]:>8.3f}') + print(f'50th %ile (us): {delays[int(len(delays) * 0.50)]:>8.3f}') + print(f'90th %ile (us): {delays[int(len(delays) * 0.90)]:>8.3f}') + print(f'99th %ile (us): {delays[int(len(delays) * 0.99)]:>8.3f}') + print(f'Max delay (us): {delays[-1]:>8.3f}') + + +if __name__ == '__main__': + main() diff --git a/tests/issue_85.py b/tests/issue_85.py new file mode 100644 index 0000000..cb8789b --- /dev/null +++ b/tests/issue_85.py @@ -0,0 +1,142 @@ +"""Test Script for Issue #85 + +$ export PYTHONPATH=`pwd` +$ python tests/issue_85.py +""" + +import collections +import os +import random +import shutil +import sqlite3 +import threading +import time + +import django + + +def remove_cache_dir(): + print('REMOVING CACHE DIRECTORY') + shutil.rmtree('.cache', ignore_errors=True) + + +def init_django(): + global shard + print('INITIALIZING DJANGO') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tests.settings') + django.setup() + from django.core.cache import cache + + shard = cache._cache._shards[0] + + +def multi_threading_init_test(): + print('RUNNING MULTI-THREADING INIT TEST') + from django.core.cache import cache + + def run(): + cache.get('key') + + threads = [threading.Thread(target=run) for _ in range(50)] + _ = [thread.start() for thread in threads] + _ = [thread.join() for thread in threads] + + +def show_sqlite_compile_options(): + print('SQLITE COMPILE OPTIONS') + options = shard._sql('pragma compile_options').fetchall() + print('\n'.join(val for val, in options)) + + +def create_data_table(): + print('CREATING DATA TABLE') + shard._con.execute('create table data (x)') + nums = [(num,) for num in range(1000)] + shard._con.executemany('insert into data values (?)', nums) + + +commands = { + 'begin/read/write': [ + 'BEGIN', + 'SELECT MAX(x) FROM data', + 'UPDATE data SET x = x + 1', + 'COMMIT', + ], + 'begin/write/read': [ + 'BEGIN', + 'UPDATE data SET x = x + 1', + 'SELECT MAX(x) FROM data', + 'COMMIT', + ], + 'begin immediate/read/write': [ + 'BEGIN IMMEDIATE', + 'SELECT MAX(x) FROM data', + 'UPDATE data SET x = x + 1', + 'COMMIT', + ], + 'begin immediate/write/read': [ + 'BEGIN IMMEDIATE', + 'UPDATE data SET x = x + 1', + 'SELECT MAX(x) FROM data', + 'COMMIT', + ], + 'begin exclusive/read/write': [ + 'BEGIN EXCLUSIVE', + 'SELECT MAX(x) FROM data', + 'UPDATE data SET x = x + 1', + 'COMMIT', + ], + 'begin exclusive/write/read': [ + 'BEGIN EXCLUSIVE', + 'UPDATE data SET x = x + 1', + 'SELECT MAX(x) FROM data', + 'COMMIT', + ], +} + + +values = collections.deque() + + +def run(statements): + ident = threading.get_ident() + try: + for index, statement in enumerate(statements): + if index == (len(statements) - 1): + values.append(('COMMIT', ident)) + time.sleep(random.random() / 10.0) + shard._sql(statement) + if index == 0: + values.append(('BEGIN', ident)) + except sqlite3.OperationalError: + values.append(('ERROR', ident)) + + +def test_transaction_errors(): + for key, statements in commands.items(): + print(f'RUNNING {key}') + values.clear() + threads = [] + for _ in range(100): + thread = threading.Thread(target=run, args=(statements,)) + threads.append(thread) + _ = [thread.start() for thread in threads] + _ = [thread.join() for thread in threads] + errors = [pair for pair in values if pair[0] == 'ERROR'] + begins = [pair for pair in values if pair[0] == 'BEGIN'] + commits = [pair for pair in values if pair[0] == 'COMMIT'] + print('Error count:', len(errors)) + print('Begin count:', len(begins)) + print('Commit count:', len(commits)) + begin_idents = [ident for _, ident in begins] + commit_idents = [ident for _, ident in commits] + print('Serialized:', begin_idents == commit_idents) + + +if __name__ == '__main__': + remove_cache_dir() + init_django() + multi_threading_init_test() + show_sqlite_compile_options() + create_data_table() + test_transaction_errors() diff --git a/tests/models.py b/tests/models.py index 349fd87..a546822 100644 --- a/tests/models.py +++ b/tests/models.py @@ -10,4 +10,6 @@ def expensive_calculation(): class Poll(models.Model): question = models.CharField(max_length=200) answer = models.CharField(max_length=200) - pub_date = models.DateTimeField('date published', default=expensive_calculation) + pub_date = models.DateTimeField( + 'date published', default=expensive_calculation + ) diff --git a/tests/plot.py b/tests/plot.py index d8d1be0..fcac0bc 100644 --- a/tests/plot.py +++ b/tests/plot.py @@ -2,18 +2,18 @@ $ export PYTHONPATH=/Users/grantj/repos/python-diskcache $ python tests/plot.py --show tests/timings_core_p1.txt - """ import argparse import collections as co -import matplotlib.pyplot as plt import re import sys +import matplotlib.pyplot as plt + def parse_timing(timing, limit): - "Parse timing." + """Parse timing.""" if timing.endswith('ms'): value = float(timing[:-2]) * 1e-3 elif timing.endswith('us'): @@ -25,12 +25,12 @@ def parse_timing(timing, limit): def parse_row(row, line): - "Parse row." + """Parse row.""" return [val.strip() for val in row.match(line).groups()] def parse_data(infile): - "Parse data from `infile`." + """Parse data from `infile`.""" blocks = re.compile(' '.join(['=' * 9] * 8)) dashes = re.compile('^-{79}$') title = re.compile('^Timings for (.*)$') @@ -47,7 +47,7 @@ def parse_data(infile): if blocks.match(line): try: name = title.match(lines[index + 1]).group(1) - except: + except Exception: index += 1 continue @@ -82,7 +82,7 @@ def parse_data(infile): def make_plot(data, action, save=False, show=False, limit=0.005): - "Make plot." + """Make plot.""" fig, ax = plt.subplots(figsize=(8, 10)) colors = ['#ff7f00', '#377eb8', '#4daf4a', '#984ea3', '#e41a1c'] width = 0.15 @@ -93,12 +93,17 @@ def make_plot(data, action, save=False, show=False, limit=0.005): bars = [] for pos, (name, color) in enumerate(zip(names, colors)): - bars.append(ax.bar( - [val + pos * width for val in index], - [parse_timing(data[name][action][tick], limit) for tick in ticks], - width, - color=color, - )) + bars.append( + ax.bar( + [val + pos * width for val in index], + [ + parse_timing(data[name][action][tick], limit) + for tick in ticks + ], + width, + color=color, + ) + ) ax.set_ylabel('Time (microseconds)') ax.set_title('"%s" Time vs Percentile' % action) @@ -106,12 +111,14 @@ def make_plot(data, action, save=False, show=False, limit=0.005): ax.set_xticklabels(ticks) box = ax.get_position() - ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8]) + ax.set_position( + [box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8] + ) ax.legend( [bar[0] for bar in bars], names, loc='lower center', - bbox_to_anchor=(0.5, -0.25) + bbox_to_anchor=(0.5, -0.25), ) if show: diff --git a/tests/plot_early_recompute.py b/tests/plot_early_recompute.py new file mode 100644 index 0000000..1508c45 --- /dev/null +++ b/tests/plot_early_recompute.py @@ -0,0 +1,176 @@ +"""Early Recomputation Measurements +""" + +import functools as ft +import multiprocessing.pool +import shutil +import threading +import time + +import diskcache as dc + + +def make_timer(times): + """Make a decorator which accumulates (start, end) in `times` for function + calls. + + """ + lock = threading.Lock() + + def timer(func): + @ft.wraps(func) + def wrapper(*args, **kwargs): + start = time.time() + func(*args, **kwargs) + pair = start, time.time() + with lock: + times.append(pair) + + return wrapper + + return timer + + +def make_worker(times, delay=0.2): + """Make a worker which accumulates (start, end) in `times` and sleeps for + `delay` seconds. + + """ + + @make_timer(times) + def worker(): + time.sleep(delay) + + return worker + + +def make_repeater(func, total=10, delay=0.01): + """Make a repeater which calls `func` and sleeps for `delay` seconds + repeatedly until `total` seconds have elapsed. + + """ + + def repeat(num): + start = time.time() + while time.time() - start < total: + func() + time.sleep(delay) + + return repeat + + +def frange(start, stop, step=1e-3): + """Generator for floating point values from `start` to `stop` by `step`.""" + while start < stop: + yield start + start += step + + +def plot(option, filename, cache_times, worker_times): + """Plot concurrent workers and latency.""" + import matplotlib.pyplot as plt + + fig, (workers, latency) = plt.subplots(2, sharex=True) + + fig.suptitle(option) + + changes = [(start, 1) for start, _ in worker_times] + changes.extend((stop, -1) for _, stop in worker_times) + changes.sort() + start = (changes[0][0] - 1e-6, 0) + counts = [start] + + for mark, diff in changes: + # Re-sample between previous and current data point for a nicer-looking + # line plot. + + for step in frange(counts[-1][0], mark): + pair = (step, counts[-1][1]) + counts.append(pair) + + pair = (mark, counts[-1][1] + diff) + counts.append(pair) + + min_x = min(start for start, _ in cache_times) + max_x = max(start for start, _ in cache_times) + for step in frange(counts[-1][0], max_x): + pair = (step, counts[-1][1]) + counts.append(pair) + + x_counts = [x - min_x for x, y in counts] + y_counts = [y for x, y in counts] + + workers.set_title('Concurrency') + workers.set_ylabel('Workers') + workers.set_ylim(0, 11) + workers.plot(x_counts, y_counts) + + latency.set_title('Latency') + latency.set_ylabel('Seconds') + latency.set_ylim(0, 0.5) + latency.set_xlabel('Time') + x_latency = [start - min_x for start, _ in cache_times] + y_latency = [stop - start for start, stop in cache_times] + latency.scatter(x_latency, y_latency) + + plt.savefig(filename) + + +def main(): + shutil.rmtree('/tmp/cache') + cache = dc.Cache('/tmp/cache') + + count = 10 + + cache_times = [] + timer = make_timer(cache_times) + + options = { + ('No Caching', 'no-caching.png'): [ + timer, + ], + ('Traditional Caching', 'traditional-caching.png'): [ + timer, + cache.memoize(expire=1), + ], + ('Synchronized Locking', 'synchronized-locking.png'): [ + timer, + cache.memoize(expire=0), + dc.barrier(cache, dc.Lock), + cache.memoize(expire=1), + ], + ('Early Recomputation', 'early-recomputation.png'): [ + timer, + dc.memoize_stampede(cache, expire=1), + ], + ('Early Recomputation (beta=0.5)', 'early-recomputation-05.png'): [ + timer, + dc.memoize_stampede(cache, expire=1, beta=0.5), + ], + ('Early Recomputation (beta=0.3)', 'early-recomputation-03.png'): [ + timer, + dc.memoize_stampede(cache, expire=1, beta=0.3), + ], + } + + for (option, filename), decorators in options.items(): + print('Simulating:', option) + worker_times = [] + worker = make_worker(worker_times) + for decorator in reversed(decorators): + worker = decorator(worker) + + worker() + repeater = make_repeater(worker) + + with multiprocessing.pool.ThreadPool(count) as pool: + pool.map(repeater, [worker] * count) + + plot(option, filename, cache_times, worker_times) + + cache.clear() + cache_times.clear() + + +if __name__ == '__main__': + main() diff --git a/tests/settings.py b/tests/settings.py index 12b65e2..04aee85 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -25,7 +25,7 @@ # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True -ALLOWED_HOSTS = [u'testserver'] +ALLOWED_HOSTS = ['testserver'] # Application definition @@ -131,35 +131,4 @@ 'BACKEND': 'diskcache.DjangoCache', 'LOCATION': CACHE_DIR, }, - 'memcached': { - 'BACKEND': 'django.core.cache.backends.memcached.PyLibMCCache', - 'LOCATION': '127.0.0.1:11211', - }, - 'redis': { - 'BACKEND': 'django_redis.cache.RedisCache', - 'LOCATION': 'redis://127.0.0.1:6379/1', - 'OPTIONS': { - 'CLIENT_CLASS': 'django_redis.client.DefaultClient', - } - }, - 'filebased': { - 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', - 'LOCATION': '/tmp/django_cache', - 'OPTIONS': { - 'CULL_FREQUENCY': 10, - 'MAX_ENTRIES': 1000, - } - }, - 'locmem': { - 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', - 'LOCATION': 'diskcache', - 'OPTIONS': { - 'CULL_FREQUENCY': 10, - 'MAX_ENTRIES': 1000, - } - }, - 'diskcache': { - 'BACKEND': 'diskcache.DjangoCache', - 'LOCATION': 'tmp', - }, } diff --git a/tests/settings_benchmark.py b/tests/settings_benchmark.py new file mode 100644 index 0000000..c734e68 --- /dev/null +++ b/tests/settings_benchmark.py @@ -0,0 +1,39 @@ +from .settings import * # noqa + +CACHES = { + 'default': { + 'BACKEND': 'diskcache.DjangoCache', + 'LOCATION': CACHE_DIR, # noqa + }, + 'memcached': { + 'BACKEND': 'django.core.cache.backends.memcached.PyLibMCCache', + 'LOCATION': '127.0.0.1:11211', + }, + 'redis': { + 'BACKEND': 'django_redis.cache.RedisCache', + 'LOCATION': 'redis://127.0.0.1:6379/1', + 'OPTIONS': { + 'CLIENT_CLASS': 'django_redis.client.DefaultClient', + }, + }, + 'filebased': { + 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', + 'LOCATION': '/tmp/django_cache', + 'OPTIONS': { + 'CULL_FREQUENCY': 10, + 'MAX_ENTRIES': 1000, + }, + }, + 'locmem': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'diskcache', + 'OPTIONS': { + 'CULL_FREQUENCY': 10, + 'MAX_ENTRIES': 1000, + }, + }, + 'diskcache': { + 'BACKEND': 'diskcache.DjangoCache', + 'LOCATION': 'tmp', + }, +} diff --git a/tests/stress_test_core.py b/tests/stress_test_core.py index a3e7c8b..2b2578b 100644 --- a/tests/stress_test_core.py +++ b/tests/stress_test_core.py @@ -1,28 +1,17 @@ -"Stress test diskcache.core.Cache." - -from __future__ import print_function +"""Stress test diskcache.core.Cache.""" import collections as co -from diskcache import Cache, UnknownFileWarning, EmptyDirWarning import multiprocessing as mp import os +import pickle +import queue import random import shutil -import sys import threading import time import warnings -try: - import Queue -except ImportError: - import queue as Queue - -if sys.hexversion < 0x03000000: - range = xrange - import cPickle as pickle -else: - import pickle +from diskcache import Cache, EmptyDirWarning, Timeout, UnknownFileWarning from .utils import display @@ -44,13 +33,15 @@ def make_long(): def make_unicode(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)) + word = ''.join(random.sample('abcdefghijklmnopqrstuvwxyz', word_size)) size = random.randint(1, int(200 / 13)) return word * size def make_bytes(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)).encode('utf-8') + word = ''.join( + random.sample('abcdefghijklmnopqrstuvwxyz', word_size) + ).encode('utf-8') size = random.randint(1, int(200 / 13)) return word * size @@ -60,7 +51,14 @@ def make_float(): def make_object(): return (make_float(),) * random.randint(1, 20) - funcs = [make_int, make_long, make_unicode, make_bytes, make_float, make_object] + funcs = [ + make_int, + make_long, + make_unicode, + make_bytes, + make_float, + make_object, + ] while True: func = random.choice(funcs) @@ -77,14 +75,16 @@ def make_long(): def make_unicode(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)) - size = random.randint(1, int(2 ** 16 / 13)) + word = ''.join(random.sample('abcdefghijklmnopqrstuvwxyz', word_size)) + size = random.randint(1, int(2**16 / 13)) return word * size def make_bytes(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)).encode('utf-8') - size = random.randint(1, int(2 ** 16 / 13)) + word = ''.join( + random.sample('abcdefghijklmnopqrstuvwxyz', word_size) + ).encode('utf-8') + size = random.randint(1, int(2**16 / 13)) return word * size def make_float(): @@ -93,7 +93,14 @@ def make_float(): def make_object(): return [make_float()] * random.randint(1, int(2e3)) - funcs = [make_int, make_long, make_unicode, make_bytes, make_float, make_object] + funcs = [ + make_int, + make_long, + make_unicode, + make_bytes, + make_float, + make_object, + ] while True: func = random.choice(funcs) @@ -124,27 +131,40 @@ def all_ops(): def worker(queue, eviction_policy, processes, threads): - timings = {'get': [], 'set': [], 'delete': []} + timings = co.defaultdict(list) cache = Cache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() - if action == 'set': - cache.set(key, value, expire=EXPIRE) - elif action == 'get': - result = cache.get(key) + try: + if action == 'set': + cache.set(key, value, expire=EXPIRE) + elif action == 'get': + result = cache.get(key) + else: + assert action == 'delete' + cache.delete(key) + except Timeout: + miss = True else: - assert action == 'delete' - cache.delete(key) + miss = False stop = time.time() - if action == 'get' and processes == 1 and threads == 1 and EXPIRE is None: + if ( + action == 'get' + and processes == 1 + and threads == 1 + and EXPIRE is None + ): assert result == value if index > WARMUP: - timings[action].append(stop - start) + delta = stop - start + timings[action].append(delta) + if miss: + timings[action + '-miss'].append(delta) queue.put(timings) @@ -155,11 +175,13 @@ def dispatch(num, eviction_policy, processes, threads): with open('input-%s.pkl' % num, 'rb') as reader: process_queue = pickle.load(reader) - thread_queues = [Queue.Queue() for _ in range(threads)] + thread_queues = [queue.Queue() for _ in range(threads)] subthreads = [ threading.Thread( - target=worker, args=(thread_queue, eviction_policy, processes, threads) - ) for thread_queue in thread_queues + target=worker, + args=(thread_queue, eviction_policy, processes, threads), + ) + for thread_queue in thread_queues ] for index, triplet in enumerate(process_queue): @@ -169,7 +191,7 @@ def dispatch(num, eviction_policy, processes, threads): for thread_queue in thread_queues: thread_queue.put(None) - start = time.time() + # start = time.time() for thread in subthreads: thread.start() @@ -177,9 +199,9 @@ def dispatch(num, eviction_policy, processes, threads): for thread in subthreads: thread.join() - stop = time.time() + # stop = time.time() - timings = {'get': [], 'set': [], 'delete': [], 'self': (stop - start)} + timings = co.defaultdict(list) for thread_queue in thread_queues: data = thread_queue.get() @@ -204,9 +226,13 @@ def percentile(sequence, percent): return values[pos] -def stress_test(create=True, delete=True, - eviction_policy=u'least-recently-stored', - processes=1, threads=1): +def stress_test( + create=True, + delete=True, + eviction_policy='least-recently-stored', + processes=1, + threads=1, +): shutil.rmtree('tmp', ignore_errors=True) if processes == 1: @@ -243,7 +269,7 @@ def stress_test(create=True, delete=True, warnings.simplefilter('ignore', category=EmptyDirWarning) cache.check() - timings = {'get': [], 'set': [], 'delete': [], 'self': 0.0} + timings = co.defaultdict(list) for num in range(processes): with open('output-%s.pkl' % num, 'rb') as reader: @@ -262,22 +288,22 @@ def stress_test(create=True, delete=True, def stress_test_lru(): - "Stress test least-recently-used eviction policy." - stress_test(eviction_policy=u'least-recently-used') + """Stress test least-recently-used eviction policy.""" + stress_test(eviction_policy='least-recently-used') def stress_test_lfu(): - "Stress test least-frequently-used eviction policy." - stress_test(eviction_policy=u'least-frequently-used') + """Stress test least-frequently-used eviction policy.""" + stress_test(eviction_policy='least-frequently-used') def stress_test_none(): - "Stress test 'none' eviction policy." - stress_test(eviction_policy=u'none') + """Stress test 'none' eviction policy.""" + stress_test(eviction_policy='none') def stress_test_mp(): - "Stress test multiple threads and processes." + """Stress test multiple threads and processes.""" stress_test(processes=4, threads=4) @@ -288,52 +314,85 @@ def stress_test_mp(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - '-n', '--operations', type=float, default=OPERATIONS, + '-n', + '--operations', + type=float, + default=OPERATIONS, help='Number of operations to perform', ) parser.add_argument( - '-g', '--get-average', type=float, default=GET_AVERAGE, + '-g', + '--get-average', + type=float, + default=GET_AVERAGE, help='Expected value of exponential variate used for GET count', ) parser.add_argument( - '-k', '--key-count', type=float, default=KEY_COUNT, - help='Number of unique keys' + '-k', + '--key-count', + type=float, + default=KEY_COUNT, + help='Number of unique keys', ) parser.add_argument( - '-d', '--del-chance', type=float, default=DEL_CHANCE, + '-d', + '--del-chance', + type=float, + default=DEL_CHANCE, help='Likelihood of a key deletion', ) parser.add_argument( - '-w', '--warmup', type=float, default=WARMUP, + '-w', + '--warmup', + type=float, + default=WARMUP, help='Number of warmup operations before timings', ) parser.add_argument( - '-e', '--expire', type=float, default=EXPIRE, + '-e', + '--expire', + type=float, + default=EXPIRE, help='Number of seconds before key expires', ) parser.add_argument( - '-t', '--threads', type=int, default=1, + '-t', + '--threads', + type=int, + default=1, help='Number of threads to start in each process', ) parser.add_argument( - '-p', '--processes', type=int, default=1, + '-p', + '--processes', + type=int, + default=1, help='Number of processes to start', ) parser.add_argument( - '-s', '--seed', type=int, default=0, + '-s', + '--seed', + type=int, + default=0, help='Random seed', ) parser.add_argument( - '--no-create', action='store_false', dest='create', + '--no-create', + action='store_false', + dest='create', help='Do not create operations data', ) parser.add_argument( - '--no-delete', action='store_false', dest='delete', + '--no-delete', + action='store_false', + dest='delete', help='Do not delete operations data', ) parser.add_argument( - '-v', '--eviction-policy', type=unicode, - default=u'least-recently-stored', + '-v', + '--eviction-policy', + type=str, + default='least-recently-stored', ) args = parser.parse_args() diff --git a/tests/stress_test_deque.py b/tests/stress_test_deque.py index cf48812..845b2c2 100644 --- a/tests/stress_test_deque.py +++ b/tests/stress_test_deque.py @@ -1,10 +1,7 @@ """Stress test diskcache.persistent.Deque.""" -from __future__ import print_function - import collections as co import functools as ft -import itertools as it import random import diskcache as dc diff --git a/tests/stress_test_deque_mp.py b/tests/stress_test_deque_mp.py index 4624d71..f3b8a48 100644 --- a/tests/stress_test_deque_mp.py +++ b/tests/stress_test_deque_mp.py @@ -1,11 +1,7 @@ """Stress test diskcache.persistent.Deque.""" -from __future__ import print_function - -import functools as ft import itertools as it import multiprocessing as mp -import os import random import time @@ -110,12 +106,6 @@ def stress(seed, deque): def test(status=False): - if os.environ.get('TRAVIS') == 'true': - return - - if os.environ.get('APPVEYOR') == 'True': - return - random.seed(SEED) deque = dc.Deque(range(SIZE)) processes = [] diff --git a/tests/stress_test_fanout.py b/tests/stress_test_fanout.py index 080b8d8..e78dda5 100644 --- a/tests/stress_test_fanout.py +++ b/tests/stress_test_fanout.py @@ -1,28 +1,16 @@ -"Stress test diskcache.core.Cache." +"""Stress test diskcache.core.Cache.""" -from __future__ import print_function - -import collections as co -from diskcache import FanoutCache, UnknownFileWarning, EmptyDirWarning import multiprocessing as mp import os +import pickle +import queue import random import shutil -import sys import threading import time import warnings -try: - import Queue -except ImportError: - import queue as Queue - -if sys.hexversion < 0x03000000: - range = xrange - import cPickle as pickle -else: - import pickle +from diskcache import EmptyDirWarning, FanoutCache, UnknownFileWarning from .utils import display @@ -44,13 +32,15 @@ def make_long(): def make_unicode(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)) + word = ''.join(random.sample('abcdefghijklmnopqrstuvwxyz', word_size)) size = random.randint(1, int(200 / 13)) return word * size def make_bytes(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)).encode('utf-8') + word = ''.join( + random.sample('abcdefghijklmnopqrstuvwxyz', word_size) + ).encode('utf-8') size = random.randint(1, int(200 / 13)) return word * size @@ -60,7 +50,14 @@ def make_float(): def make_object(): return (make_float(),) * random.randint(1, 20) - funcs = [make_int, make_long, make_unicode, make_bytes, make_float, make_object] + funcs = [ + make_int, + make_long, + make_unicode, + make_bytes, + make_float, + make_object, + ] while True: func = random.choice(funcs) @@ -77,14 +74,16 @@ def make_long(): def make_unicode(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)) - size = random.randint(1, int(2 ** 16 / 13)) + word = ''.join(random.sample('abcdefghijklmnopqrstuvwxyz', word_size)) + size = random.randint(1, int(2**16 / 13)) return word * size def make_bytes(): word_size = random.randint(1, 26) - word = u''.join(random.sample(u'abcdefghijklmnopqrstuvwxyz', word_size)).encode('utf-8') - size = random.randint(1, int(2 ** 16 / 13)) + word = ''.join( + random.sample('abcdefghijklmnopqrstuvwxyz', word_size) + ).encode('utf-8') + size = random.randint(1, int(2**16 / 13)) return word * size def make_float(): @@ -93,7 +92,14 @@ def make_float(): def make_object(): return [make_float()] * random.randint(1, int(2e3)) - funcs = [make_int, make_long, make_unicode, make_bytes, make_float, make_object] + funcs = [ + make_int, + make_long, + make_unicode, + make_bytes, + make_float, + make_object, + ] while True: func = random.choice(funcs) @@ -140,7 +146,12 @@ def worker(queue, eviction_policy, processes, threads): stop = time.time() - if action == 'get' and processes == 1 and threads == 1 and EXPIRE is None: + if ( + action == 'get' + and processes == 1 + and threads == 1 + and EXPIRE is None + ): assert result == value if index > WARMUP: @@ -155,11 +166,13 @@ def dispatch(num, eviction_policy, processes, threads): with open('input-%s.pkl' % num, 'rb') as reader: process_queue = pickle.load(reader) - thread_queues = [Queue.Queue() for _ in range(threads)] + thread_queues = [queue.Queue() for _ in range(threads)] subthreads = [ threading.Thread( - target=worker, args=(thread_queue, eviction_policy, processes, threads) - ) for thread_queue in thread_queues + target=worker, + args=(thread_queue, eviction_policy, processes, threads), + ) + for thread_queue in thread_queues ] for index, triplet in enumerate(process_queue): @@ -204,9 +217,13 @@ def percentile(sequence, percent): return values[pos] -def stress_test(create=True, delete=True, - eviction_policy=u'least-recently-stored', - processes=1, threads=1): +def stress_test( + create=True, + delete=True, + eviction_policy='least-recently-stored', + processes=1, + threads=1, +): shutil.rmtree('tmp', ignore_errors=True) if processes == 1: @@ -262,22 +279,22 @@ def stress_test(create=True, delete=True, def stress_test_lru(): - "Stress test least-recently-used eviction policy." - stress_test(eviction_policy=u'least-recently-used') + """Stress test least-recently-used eviction policy.""" + stress_test(eviction_policy='least-recently-used') def stress_test_lfu(): - "Stress test least-frequently-used eviction policy." - stress_test(eviction_policy=u'least-frequently-used') + """Stress test least-frequently-used eviction policy.""" + stress_test(eviction_policy='least-frequently-used') def stress_test_none(): - "Stress test 'none' eviction policy." - stress_test(eviction_policy=u'none') + """Stress test 'none' eviction policy.""" + stress_test(eviction_policy='none') def stress_test_mp(): - "Stress test multiple threads and processes." + """Stress test multiple threads and processes.""" stress_test(processes=4, threads=4) @@ -288,52 +305,85 @@ def stress_test_mp(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - '-n', '--operations', type=float, default=OPERATIONS, + '-n', + '--operations', + type=float, + default=OPERATIONS, help='Number of operations to perform', ) parser.add_argument( - '-g', '--get-average', type=float, default=GET_AVERAGE, + '-g', + '--get-average', + type=float, + default=GET_AVERAGE, help='Expected value of exponential variate used for GET count', ) parser.add_argument( - '-k', '--key-count', type=float, default=KEY_COUNT, - help='Number of unique keys' + '-k', + '--key-count', + type=float, + default=KEY_COUNT, + help='Number of unique keys', ) parser.add_argument( - '-d', '--del-chance', type=float, default=DEL_CHANCE, + '-d', + '--del-chance', + type=float, + default=DEL_CHANCE, help='Likelihood of a key deletion', ) parser.add_argument( - '-w', '--warmup', type=float, default=WARMUP, + '-w', + '--warmup', + type=float, + default=WARMUP, help='Number of warmup operations before timings', ) parser.add_argument( - '-e', '--expire', type=float, default=EXPIRE, + '-e', + '--expire', + type=float, + default=EXPIRE, help='Number of seconds before key expires', ) parser.add_argument( - '-t', '--threads', type=int, default=1, + '-t', + '--threads', + type=int, + default=1, help='Number of threads to start in each process', ) parser.add_argument( - '-p', '--processes', type=int, default=1, + '-p', + '--processes', + type=int, + default=1, help='Number of processes to start', ) parser.add_argument( - '-s', '--seed', type=int, default=0, + '-s', + '--seed', + type=int, + default=0, help='Random seed', ) parser.add_argument( - '--no-create', action='store_false', dest='create', + '--no-create', + action='store_false', + dest='create', help='Do not create operations data', ) parser.add_argument( - '--no-delete', action='store_false', dest='delete', + '--no-delete', + action='store_false', + dest='delete', help='Do not delete operations data', ) parser.add_argument( - '-v', '--eviction-policy', type=unicode, - default=u'least-recently-stored', + '-v', + '--eviction-policy', + type=str, + default='least-recently-stored', ) args = parser.parse_args() diff --git a/tests/stress_test_index.py b/tests/stress_test_index.py index 2846d9c..e7ba3f6 100644 --- a/tests/stress_test_index.py +++ b/tests/stress_test_index.py @@ -1,7 +1,5 @@ """Stress test diskcache.persistent.Index.""" -from __future__ import print_function - import collections as co import itertools as it import random diff --git a/tests/stress_test_index_mp.py b/tests/stress_test_index_mp.py index b3ed813..06ed102 100644 --- a/tests/stress_test_index_mp.py +++ b/tests/stress_test_index_mp.py @@ -1,10 +1,7 @@ """Stress test diskcache.persistent.Index.""" -from __future__ import print_function - import itertools as it import multiprocessing as mp -import os import random import time @@ -96,12 +93,6 @@ def stress(seed, index): def test(status=False): - if os.environ.get('TRAVIS') == 'true': - return - - if os.environ.get('APPVEYOR') == 'True': - return - random.seed(SEED) index = dc.Index(enumerate(range(KEYS))) processes = [] diff --git a/tests/talk/benchmark.py b/tests/talk/benchmark.py deleted file mode 100644 index e115162..0000000 --- a/tests/talk/benchmark.py +++ /dev/null @@ -1,35 +0,0 @@ -import random, requests, signal, time, threading - -signal.signal(signal.SIGINT, lambda signum, frame: exit()) - - -count = 0 - -def monitor(): - global count - while True: - time.sleep(1) - print(f"{'*' * (count // 8)}") - count = 0 - -thread = threading.Thread(target=monitor) -thread.daemon = True -thread.start() - - -# Histogram of expovariate values: -# value | count -# ----- | ----- -# 64 | ************************************************************* -# 127 | ******************************** -# 191 | *************** -# 254 | ****** -# 318 | *** -# 382 | ** -# 445 | * -# 509 | - -while True: - value = int(random.expovariate(1) * 100) - response = requests.get(f'http://127.0.0.1:8000/echo/{value}') - count += 1 diff --git a/tests/talk/crawler.py b/tests/talk/crawler.py deleted file mode 100644 index fc36154..0000000 --- a/tests/talk/crawler.py +++ /dev/null @@ -1,52 +0,0 @@ -import bs4, requests, signal, urllib.parse - -signal.signal(signal.SIGINT, lambda signum, frame: exit()) - -root='http://127.0.0.1:8000/' - - -def get(url): - "Get url and return response text." - print(url) - response = requests.get(url) - return response.text - - -def parse(url, text): - "Parse url with given text and yield links." - soup = bs4.BeautifulSoup(text, 'lxml') - - for anchor in soup.find_all('a', href=True): - full_url = urllib.parse.urljoin(url, anchor['href']) - href, _ = urllib.parse.urldefrag(full_url) - - if href.startswith(root): - yield href - - -from collections import deque - -def crawl(): - "Crawl root url." - urls = deque([root]) - results = dict() - - while True: - try: - url = urls.popleft() - except IndexError: - break - - if url in results: - continue - - text = get(url) - - for link in parse(url, text): - urls.append(link) - - results[url] = text - - -if __name__ == '__main__': - crawl() diff --git a/tests/talk/manage.py b/tests/talk/manage.py deleted file mode 100755 index 0c84391..0000000 --- a/tests/talk/manage.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "talk.settings") - try: - from django.core.management import execute_from_command_line - except ImportError: - # The above import may fail for some other reason. Ensure that the - # issue is really that Django is missing to avoid masking other - # exceptions on Python 2. - try: - import django - except ImportError: - raise ImportError( - "Couldn't import Django. Are you sure it's installed and " - "available on your PYTHONPATH environment variable? Did you " - "forget to activate a virtual environment?" - ) - raise - execute_from_command_line(sys.argv) diff --git a/tests/talk/talk/__init__.py b/tests/talk/talk/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/talk/talk/settings.py b/tests/talk/talk/settings.py deleted file mode 100644 index aa4165f..0000000 --- a/tests/talk/talk/settings.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Django settings for talk project. - -Generated by 'django-admin startproject' using Django 1.10.6. - -For more information on this file, see -https://docs.djangoproject.com/en/1.10/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.10/ref/settings/ -""" - -import os - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = '_lzt+2b46g)@x%set-4u7j-vjw-_%sq4xdco990z(l4o2$^_)*' - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = False - -ALLOWED_HOSTS = ['127.0.0.1'] - - -# Application definition - -INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', -] - -MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -] - -ROOT_URLCONF = 'talk.urls' - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -WSGI_APPLICATION = 'talk.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/1.10/ref/settings/#databases - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), - } -} - - -# Password validation -# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, -] - - -# Internationalization -# https://docs.djangoproject.com/en/1.10/topics/i18n/ - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.10/howto/static-files/ - -STATIC_URL = '/static/' - - -CACHES = { - 'filebased': { - 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', - 'LOCATION': '/tmp/filebased', - 'OPTIONS': { - 'MAX_ENTRIES': 1000, - } - }, - 'memcached': { - 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', - 'LOCATION': [ - '127.0.0.1:11211', - ], - }, - 'diskcache': { - 'BACKEND': 'diskcache.DjangoCache', - 'LOCATION': '/tmp/diskcache', - } -} diff --git a/tests/talk/talk/urls.py b/tests/talk/talk/urls.py deleted file mode 100644 index ac79390..0000000 --- a/tests/talk/talk/urls.py +++ /dev/null @@ -1,25 +0,0 @@ -"""talk URL Configuration - -The `urlpatterns` list routes URLs to views. For more information please see: - https://docs.djangoproject.com/en/1.10/topics/http/urls/ -Examples: -Function views - 1. Add an import: from my_app import views - 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') -Class-based views - 1. Add an import: from other_app.views import Home - 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') -Including another URLconf - 1. Import the include() function: from django.conf.urls import url, include - 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) -""" -from django.conf.urls import url -from django.contrib import admin - -from . import views - -urlpatterns = [ - url(r'^echo/(?P.*)$', views.echo), - url(r'^$', views.index), - url(r'^crawl/(?P.*)$', views.crawl), -] diff --git a/tests/talk/talk/views.py b/tests/talk/talk/views.py deleted file mode 100644 index 675b31e..0000000 --- a/tests/talk/talk/views.py +++ /dev/null @@ -1,25 +0,0 @@ -import random, time - -from django.http import HttpResponse -from django.views.decorators.cache import cache_page - -# @cache_page(3600, cache='filebased') -# @cache_page(3600, cache='memcached') -# @cache_page(3600, cache='diskcache') -def echo(request, value): - time.sleep(0.1) - return HttpResponse(value, content_type='text/plain') - - -def index(request): - return HttpResponse('0') - - -def crawl(request, value): - time.sleep(random.random()) - value = int(value) - random.seed(value) - nums = random.sample(range(100), 5) - link = '{0}
' - links = ''.join(link.format(num) for num in nums) - return HttpResponse('{}'.format(links)) diff --git a/tests/talk/talk/wsgi.py b/tests/talk/talk/wsgi.py deleted file mode 100644 index a636d03..0000000 --- a/tests/talk/talk/wsgi.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -WSGI config for talk project. - -It exposes the WSGI callable as a module-level variable named ``application``. - -For more information on this file, see -https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/ -""" - -import os - -from django.core.wsgi import get_wsgi_application - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "talk.settings") - -application = get_wsgi_application() diff --git a/tests/test_core.py b/tests/test_core.py index 38c15d5..788afef 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,53 +1,35 @@ -"Test diskcache.core.Cache." +"""Test diskcache.core.Cache.""" -from __future__ import print_function - -import collections as co import errno -import functools as ft import hashlib import io -import json -import mock -import nose.tools as nt import os import os.path as op -import random +import pathlib +import pickle import shutil import sqlite3 import subprocess as sp -import sys +import tempfile import threading import time -import unittest import warnings -import zlib +from unittest import mock -try: - import cPickle as pickle -except: - import pickle +import pytest -import diskcache import diskcache as dc -warnings.simplefilter('error') -warnings.simplefilter('ignore', category=dc.EmptyDirWarning) +pytestmark = pytest.mark.filterwarnings('ignore', category=dc.EmptyDirWarning) -if sys.hexversion < 0x03000000: - range = xrange -def setup_cache(func): - @ft.wraps(func) - def wrapper(): - shutil.rmtree('tmp', ignore_errors=True) - with dc.Cache('tmp') as cache: - func(cache) - shutil.rmtree('tmp', ignore_errors=True) - return wrapper +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) -@setup_cache def test_init(cache): for key, value in dc.DEFAULT_SETTINGS.items(): assert getattr(cache, key) == value @@ -56,19 +38,25 @@ def test_init(cache): cache.close() +def test_init_path(cache): + path = pathlib.Path(cache.directory) + other = dc.Cache(path) + other.close() + assert cache.directory == other.directory + + def test_init_disk(): - with dc.Cache('tmp', disk_pickle_protocol=1, disk_min_file_size=2 ** 20) as cache: + with dc.Cache(disk_pickle_protocol=1, disk_min_file_size=2**20) as cache: key = (None, 0, 'abc') cache[key] = 0 cache.check() - assert cache.directory == 'tmp' - assert cache.disk_min_file_size == 2 ** 20 + assert cache.disk_min_file_size == 2**20 assert cache.disk_pickle_protocol == 1 - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache.directory, ignore_errors=True) def test_disk_reset(): - with dc.Cache('tmp', disk_min_file_size=0, disk_pickle_protocol=0) as cache: + with dc.Cache(disk_min_file_size=0, disk_pickle_protocol=0) as cache: value = (None, 0, 'abc') cache[0] = value @@ -79,55 +67,28 @@ def test_disk_reset(): assert cache._disk.min_file_size == 0 assert cache._disk.pickle_protocol == 0 - cache.reset('disk_min_file_size', 2 ** 10) + cache.reset('disk_min_file_size', 2**10) cache.reset('disk_pickle_protocol', 2) cache[1] = value cache.check() - assert cache.disk_min_file_size == 2 ** 10 + assert cache.disk_min_file_size == 2**10 assert cache.disk_pickle_protocol == 2 - assert cache._disk.min_file_size == 2 ** 10 + assert cache._disk.min_file_size == 2**10 assert cache._disk.pickle_protocol == 2 - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache.directory, ignore_errors=True) -@nt.raises(ValueError) def test_disk_valueerror(): - with dc.Cache('tmp', disk=dc.Disk('tmp')) as cache: - pass - - -class JSONDisk(diskcache.Disk): - def __init__(self, directory, compress_level=1, **kwargs): - self.compress_level = compress_level - super(JSONDisk, self).__init__(directory, **kwargs) - - def put(self, key): - json_bytes = json.dumps(key).encode('utf-8') - data = zlib.compress(json_bytes, self.compress_level) - return super(JSONDisk, self).put(data) - - def get(self, key, raw): - data = super(JSONDisk, self).get(key, raw) - return json.loads(zlib.decompress(data).decode('utf-8')) - - def store(self, value, read, key=dc.UNKNOWN): - if not read: - json_bytes = json.dumps(value).encode('utf-8') - value = zlib.compress(json_bytes, self.compress_level) - return super(JSONDisk, self).store(value, read, key=key) - - def fetch(self, mode, filename, value, read): - data = super(JSONDisk, self).fetch(mode, filename, value, read) - if not read: - data = json.loads(zlib.decompress(data).decode('utf-8')) - return data + with pytest.raises(ValueError): + with dc.Cache(disk=dc.Disk('test')): + pass def test_custom_disk(): - with dc.Cache('tmp', disk=JSONDisk, disk_compress_level=6) as cache: + with dc.Cache(disk=dc.JSONDisk, disk_compress_level=6) as cache: values = [None, True, 0, 1.23, {}, [None] * 10000] for value in values: @@ -136,10 +97,15 @@ def test_custom_disk(): for value in values: assert cache[value] == value - shutil.rmtree('tmp', ignore_errors=True) + for key, value in zip(cache, values): + assert key == value + + test_memoize_iter(cache) + shutil.rmtree(cache.directory, ignore_errors=True) -class SHA256FilenameDisk(diskcache.Disk): + +class SHA256FilenameDisk(dc.Disk): def filename(self, key=dc.UNKNOWN, value=dc.UNKNOWN): filename = hashlib.sha256(key).hexdigest()[:32] full_path = op.join(self._directory, filename) @@ -147,7 +113,7 @@ def filename(self, key=dc.UNKNOWN, value=dc.UNKNOWN): def test_custom_filename_disk(): - with dc.Cache('tmp', disk=SHA256FilenameDisk) as cache: + with dc.Cache(disk=SHA256FilenameDisk) as cache: for count in range(100, 200): key = str(count).encode('ascii') cache[key] = str(count) * int(1e5) @@ -155,49 +121,29 @@ def test_custom_filename_disk(): for count in range(100, 200): key = str(count).encode('ascii') filename = hashlib.sha256(key).hexdigest()[:32] - full_path = op.join('tmp', filename) + full_path = op.join(cache.directory, filename) with open(full_path) as reader: content = reader.read() assert content == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache.directory, ignore_errors=True) -@nt.raises(EnvironmentError) def test_init_makedirs(): - shutil.rmtree('tmp', ignore_errors=True) + cache_dir = tempfile.mkdtemp() + shutil.rmtree(cache_dir) makedirs = mock.Mock(side_effect=OSError(errno.EACCES)) - try: - with mock.patch('os.makedirs', makedirs): - cache = dc.Cache('tmp') - except EnvironmentError: - shutil.rmtree('tmp') - raise - - -@setup_cache -def test_pragma(cache): - local = mock.Mock() - con = mock.Mock() - execute = mock.Mock() - cursor = mock.Mock() - fetchall = mock.Mock() - - local.con = con - con.execute = execute - execute.return_value = cursor - cursor.fetchall = fetchall - fetchall.side_effect = [sqlite3.OperationalError, None] - - size = 2 ** 28 + with pytest.raises(EnvironmentError): + try: + with mock.patch('os.makedirs', makedirs): + dc.Cache(cache_dir) + except EnvironmentError: + shutil.rmtree(cache_dir, ignore_errors=True) + raise - with mock.patch.object(cache, '_local', local): - assert cache.reset('sqlite_mmap_size', size) == size -@setup_cache -@nt.raises(sqlite3.OperationalError) def test_pragma_error(cache): local = mock.Mock() con = mock.Mock() @@ -205,24 +151,26 @@ def test_pragma_error(cache): cursor = mock.Mock() fetchall = mock.Mock() + local.pid = os.getpid() local.con = con con.execute = execute execute.return_value = cursor cursor.fetchall = fetchall fetchall.side_effect = [sqlite3.OperationalError] * 60000 - size = 2 ** 28 + size = 2**28 with mock.patch('time.sleep', lambda num: 0): with mock.patch.object(cache, '_local', local): - cache.reset('sqlite_mmap_size', size) + with pytest.raises(sqlite3.OperationalError): + cache.reset('sqlite_mmap_size', size) -@setup_cache def test_close_error(cache): class LocalTest(object): def __init__(self): self._calls = 0 + def __getattr__(self, name): if self._calls: raise AttributeError @@ -234,19 +182,18 @@ def __getattr__(self, name): cache.close() -@setup_cache def test_getsetdel(cache): values = [ (None, False), - ((None,) * 2 ** 20, False), + ((None,) * 2**20, False), (1234, False), - (2 ** 512, False), + (2**512, False), (56.78, False), - (u'hello', False), - (u'hello' * 2 ** 20, False), + ('hello', False), + ('hello' * 2**20, False), (b'world', False), - (b'world' * 2 ** 20, False), - (io.BytesIO(b'world' * 2 ** 20), True), + (b'world' * 2**20, False), + (io.BytesIO(b'world' * 2**20), True), ] for key, (value, file_like) in enumerate(values): @@ -281,52 +228,36 @@ def test_getsetdel(cache): cache.check() -@nt.raises(KeyError) -@setup_cache def test_get_keyerror1(cache): - cache[0] + with pytest.raises(KeyError): + cache[0] -@nt.raises(IOError, KeyError) -@setup_cache def test_get_keyerror4(cache): func = mock.Mock(side_effect=IOError(errno.ENOENT, '')) cache.reset('statistics', True) - cache[0] = b'abcd' * 2 ** 20 + cache[0] = b'abcd' * 2**20 with mock.patch('diskcache.core.open', func): - cache[0] + with pytest.raises((IOError, KeyError, OSError)): + cache[0] -@nt.raises(IOError) -@setup_cache -def test_get_keyerror5(cache): - func = mock.Mock(side_effect=IOError(errno.EACCES, '')) - - cache[0] = b'abcd' * 2 ** 20 - - with mock.patch('diskcache.core.open', func): - cache[0] - - -@setup_cache def test_read(cache): - cache.set(0, b'abcd' * 2 ** 20) + cache.set(0, b'abcd' * 2**20) with cache.read(0) as reader: assert reader is not None -@nt.raises(KeyError) -@setup_cache def test_read_keyerror(cache): - with cache.read(0) as reader: - pass + with pytest.raises(KeyError): + with cache.read(0): + pass -@setup_cache def test_set_twice(cache): - large_value = b'abcd' * 2 ** 20 + large_value = b'abcd' * 2**20 cache[0] = 0 cache[0] = 1 @@ -347,51 +278,48 @@ def test_set_twice(cache): cache.check() -@setup_cache -@nt.raises(dc.Timeout) def test_set_timeout(cache): local = mock.Mock() con = mock.Mock() execute = mock.Mock() + local.pid = os.getpid() local.con = con con.execute = execute execute.side_effect = sqlite3.OperationalError - try: - with mock.patch.object(cache, '_local', local): - cache.set('a', 'b' * 2 ** 20) - finally: - cache.check() + with pytest.raises(dc.Timeout): + try: + with mock.patch.object(cache, '_local', local): + cache.set('a', 'b' * 2**20) + finally: + cache.check() -@setup_cache def test_raw(cache): assert cache.set(0, io.BytesIO(b'abcd'), read=True) assert cache[0] == b'abcd' -@setup_cache def test_get(cache): assert cache.get(0) is None assert cache.get(1, 'dne') == 'dne' assert cache.get(2, {}) == {} assert cache.get(0, expire_time=True, tag=True) == (None, None, None) - assert cache.set(0, 0, expire=None, tag=u'number') + assert cache.set(0, 0, expire=None, tag='number') assert cache.get(0, expire_time=True) == (0, None) - assert cache.get(0, tag=True) == (0, u'number') - assert cache.get(0, expire_time=True, tag=True) == (0, None, u'number') + assert cache.get(0, tag=True) == (0, 'number') + assert cache.get(0, expire_time=True, tag=True) == (0, None, 'number') + -@setup_cache def test_get_expired_fast_path(cache): assert cache.set(0, 0, expire=0.001) time.sleep(0.01) assert cache.get(0) is None -@setup_cache def test_get_ioerror_fast_path(cache): assert cache.set(0, 0) @@ -410,7 +338,6 @@ def test_get_ioerror_fast_path(cache): assert cache.get(0) is None -@setup_cache def test_get_expired_slow_path(cache): cache.stats(enable=True) cache.reset('eviction_policy', 'least-recently-used') @@ -419,28 +346,6 @@ def test_get_expired_slow_path(cache): assert cache.get(0) is None -@setup_cache -@nt.raises(IOError) -def test_get_ioerror_slow_path(cache): - cache.reset('eviction_policy', 'least-recently-used') - cache.set(0, 0) - - disk = mock.Mock() - put = mock.Mock() - fetch = mock.Mock() - - disk.put = put - put.side_effect = [(0, True)] - disk.fetch = fetch - io_error = IOError() - io_error.errno = errno.EACCES - fetch.side_effect = io_error - - with mock.patch.object(cache, '_disk', disk): - cache.get(0) - - -@setup_cache def test_pop(cache): assert cache.incr('alpha') == 1 assert cache.pop('alpha') == 1 @@ -450,7 +355,7 @@ def test_pop(cache): assert cache.set('alpha', 123, expire=1, tag='blue') assert cache.pop('alpha', tag=True) == (123, 'blue') - assert cache.set('beta', 456, expire=0, tag='green') + assert cache.set('beta', 456, expire=1e-9, tag='green') time.sleep(0.01) assert cache.pop('beta', 'dne') == 'dne' @@ -462,11 +367,10 @@ def test_pop(cache): assert cache.set('delta', 210) assert cache.pop('delta', expire_time=True) == (210, None) - assert cache.set('epsilon', '0' * 2 ** 20) - assert cache.pop('epsilon') == '0' * 2 ** 20 + assert cache.set('epsilon', '0' * 2**20) + assert cache.pop('epsilon') == '0' * 2**20 -@setup_cache def test_pop_ioerror(cache): assert cache.set(0, 0) @@ -485,27 +389,6 @@ def test_pop_ioerror(cache): assert cache.pop(0) is None -@setup_cache -@nt.raises(IOError) -def test_pop_ioerror_eacces(cache): - assert cache.set(0, 0) - - disk = mock.Mock() - put = mock.Mock() - fetch = mock.Mock() - - disk.put = put - put.side_effect = [(0, True)] - disk.fetch = fetch - io_error = IOError() - io_error.errno = errno.EACCES - fetch.side_effect = io_error - - with mock.patch.object(cache, '_disk', disk): - cache.pop(0) - - -@setup_cache def test_delete(cache): cache[0] = 0 assert cache.delete(0) @@ -514,21 +397,18 @@ def test_delete(cache): assert len(cache.check()) == 0 -@nt.raises(KeyError) -@setup_cache def test_del(cache): - del cache[0] + with pytest.raises(KeyError): + del cache[0] -@nt.raises(KeyError) -@setup_cache def test_del_expired(cache): cache.set(0, 0, expire=0.001) time.sleep(0.01) - del cache[0] + with pytest.raises(KeyError): + del cache[0] -@setup_cache def test_stats(cache): cache[0] = 0 @@ -553,13 +433,12 @@ def test_stats(cache): assert len(cache.check()) == 0 -@setup_cache def test_path(cache): - cache[0] = u'abc' - large_value = b'abc' * 2 ** 20 + cache[0] = 'abc' + large_value = b'abc' * 2**20 cache[1] = large_value - assert cache.get(0, read=True) == u'abc' + assert cache.get(0, read=True) == 'abc' with cache.get(1, read=True) as reader: assert reader.name is not None @@ -573,12 +452,11 @@ def test_path(cache): assert len(cache.check()) == 0 -@setup_cache def test_expire_rows(cache): cache.reset('cull_limit', 0) for value in range(10): - assert cache.set(value, value, expire=0) + assert cache.set(value, value, expire=1e-9) for value in range(10, 15): assert cache.set(value, value) @@ -594,9 +472,8 @@ def test_expire_rows(cache): assert len(cache.check()) == 0 -@setup_cache def test_least_recently_stored(cache): - cache.reset('eviction_policy', u'least-recently-stored') + cache.reset('eviction_policy', 'least-recently-stored') cache.reset('size_limit', int(10.1e6)) cache.reset('cull_limit', 2) @@ -630,9 +507,8 @@ def test_least_recently_stored(cache): assert len(cache.check()) == 0 -@setup_cache def test_least_recently_used(cache): - cache.reset('eviction_policy', u'least-recently-used') + cache.reset('eviction_policy', 'least-recently-used') cache.reset('size_limit', int(10.1e6)) cache.reset('cull_limit', 5) @@ -661,9 +537,8 @@ def test_least_recently_used(cache): assert len(cache.check()) == 0 -@setup_cache def test_least_frequently_used(cache): - cache.reset('eviction_policy', u'least-frequently-used') + cache.reset('eviction_policy', 'least-frequently-used') cache.reset('size_limit', int(10.1e6)) cache.reset('cull_limit', 5) @@ -690,35 +565,9 @@ def test_least_frequently_used(cache): assert len(cache.check()) == 0 -@nt.raises(OSError) -@setup_cache -def test_filename_error(cache): - func = mock.Mock(side_effect=OSError(errno.EACCES)) - - with mock.patch('os.makedirs', func): - cache._disk.filename() - - -@setup_cache -def test_remove_error(cache): - func = mock.Mock(side_effect=OSError(errno.EACCES)) - - try: - with mock.patch('os.remove', func): - cache._disk.remove('ab/cd/efg.val') - except OSError: - pass - else: - if os.name == 'nt': - pass # File delete errors ignored on Windows. - else: - raise Exception('test_remove_error failed') - - -@setup_cache def test_check(cache): - blob = b'a' * 2 ** 20 - keys = (0, 1, 1234, 56.78, u'hello', b'world', None) + blob = b'a' * 2**20 + keys = (0, 1, 1234, 56.78, 'hello', b'world', None) for key in keys: cache[key] = blob @@ -742,21 +591,20 @@ def test_check(cache): cache.check() cache.check(fix=True) - assert len(cache.check()) == 0 # Should display no warnings. + assert len(cache.check()) == 0 # Should display no warnings. -@setup_cache def test_integrity_check(cache): for value in range(1000): cache[value] = value cache.close() - with io.open('tmp/cache.db', 'r+b') as writer: + with io.open(op.join(cache.directory, 'cache.db'), 'r+b') as writer: writer.seek(52) - writer.write(b'\x00\x01') # Should be 0, change it. + writer.write(b'\x00\x01') # Should be 0, change it. - cache = dc.Cache('tmp') + cache = dc.Cache(cache.directory) with warnings.catch_warnings(): warnings.filterwarnings('ignore') @@ -766,19 +614,18 @@ def test_integrity_check(cache): assert len(cache.check()) == 0 -@setup_cache def test_expire(cache): cache.reset('cull_limit', 0) # Disable expiring keys on `set`. now = time.time() time_time = mock.Mock(return_value=now) with mock.patch('time.time', time_time): - for value in range(100): + for value in range(1, 101): assert cache.set(value, value, expire=value) assert len(cache) == 100 - time_time = mock.Mock(return_value=now + 10) + time_time = mock.Mock(return_value=now + 11) cache.reset('cull_limit', 10) with mock.patch('time.time', time_time): assert cache.expire() == 10 @@ -788,12 +635,11 @@ def test_expire(cache): def test_tag_index(): - with dc.Cache('tmp', tag_index=True) as cache: + with dc.Cache(tag_index=True) as cache: assert cache.tag_index == 1 - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache.directory, ignore_errors=True) -@setup_cache def test_evict(cache): colors = ('red', 'blue', 'yellow') @@ -806,7 +652,6 @@ def test_evict(cache): assert len(cache.check()) == 0 -@setup_cache def test_clear(cache): for value in range(100): cache[value] = value @@ -816,103 +661,48 @@ def test_clear(cache): assert len(cache.check()) == 0 -@setup_cache -@nt.raises(dc.Timeout) def test_clear_timeout(cache): transact = mock.Mock() transact.side_effect = dc.Timeout with mock.patch.object(cache, '_transact', transact): - cache.clear() + with pytest.raises(dc.Timeout): + cache.clear() -@setup_cache def test_tag(cache): - assert cache.set(0, None, tag=u'zero') + assert cache.set(0, None, tag='zero') assert cache.set(1, None, tag=1234) assert cache.set(2, None, tag=5.67) assert cache.set(3, None, tag=b'three') - assert cache.get(0, tag=True) == (None, u'zero') + assert cache.get(0, tag=True) == (None, 'zero') assert cache.get(1, tag=True) == (None, 1234) assert cache.get(2, tag=True) == (None, 5.67) assert cache.get(3, tag=True) == (None, b'three') -@setup_cache -def test_multiple_threads(cache): - values = list(range(100)) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - cache = dc.Cache('tmp') - - def worker(): - sets = list(values) - random.shuffle(sets) - - with dc.Cache('tmp') as thread_cache: - for value in sets: - thread_cache[value] = value - - threads = [threading.Thread(target=worker) for _ in range(10)] - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - for value in values: - assert cache[value] == value - - assert len(cache.check()) == 0 - - -@setup_cache -def test_thread_safe(cache): - values = list(range(100)) - - def worker(): - with cache: - sets = list(values) - random.shuffle(sets) - for value in sets: - cache[value] = value - - threads = [threading.Thread(target=worker) for _ in range(10)] - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - for value in values: - assert cache[value] == value - - assert len(cache.check()) == 0 - - -@setup_cache def test_with(cache): - with dc.Cache('tmp') as tmp: - tmp[u'a'] = 0 - tmp[u'b'] = 1 + with dc.Cache(cache.directory) as tmp: + tmp['a'] = 0 + tmp['b'] = 1 - assert cache[u'a'] == 0 - assert cache[u'b'] == 1 + assert cache['a'] == 0 + assert cache['b'] == 1 -@setup_cache def test_contains(cache): assert 0 not in cache cache[0] = 0 assert 0 in cache -@setup_cache +def test_touch(cache): + assert cache.set(0, None, expire=60) + assert cache.touch(0, expire=None) + assert cache.touch(0, expire=0) + assert not cache.touch(0) + + def test_add(cache): assert cache.add(1, 1) assert cache.get(1) == 1 @@ -925,9 +715,8 @@ def test_add(cache): cache.check() -@setup_cache def test_add_large_value(cache): - value = b'abcd' * 2 ** 20 + value = b'abcd' * 2**20 assert cache.add(b'test-key', value) assert cache.get(b'test-key') == value assert not cache.add(b'test-key', value * 2) @@ -935,55 +724,24 @@ def test_add_large_value(cache): cache.check() -def stress_add(cache, limit, results): - total = 0 - for num in range(limit): - if cache.add(num, num): - total += 1 - # Stop one thread from running ahead of others. - time.sleep(0.001) - results.append(total) - - -@setup_cache -def test_add_concurrent(cache): - results = co.deque() - limit = 1000 - - threads = [ - threading.Thread(target=stress_add, args=(cache, limit, results)) - for _ in range(16) - ] - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - assert sum(results) == limit - cache.check() - - -@setup_cache -@nt.raises(dc.Timeout) def test_add_timeout(cache): local = mock.Mock() con = mock.Mock() execute = mock.Mock() + local.pid = os.getpid() local.con = con con.execute = execute execute.side_effect = sqlite3.OperationalError - try: - with mock.patch.object(cache, '_local', local): - cache.add(0, 0) - finally: - cache.check() + with pytest.raises(dc.Timeout): + try: + with mock.patch.object(cache, '_local', local): + cache.add(0, 0) + finally: + cache.check() -@setup_cache def test_incr(cache): assert cache.incr('key', default=5) == 6 assert cache.incr('key', 2) == 8 @@ -995,22 +753,19 @@ def test_incr(cache): assert cache.incr('key') == 1 -@setup_cache -@nt.raises(KeyError) def test_incr_insert_keyerror(cache): - cache.incr('key', default=None) + with pytest.raises(KeyError): + cache.incr('key', default=None) -@setup_cache -@nt.raises(KeyError) def test_incr_update_keyerror(cache): assert cache.set('key', 100, expire=0.100) assert cache.get('key') == 100 time.sleep(0.120) - cache.incr('key', default=None) + with pytest.raises(KeyError): + cache.incr('key', default=None) -@setup_cache def test_decr(cache): assert cache.decr('key', default=5) == 4 assert cache.decr('key', 2) == 2 @@ -1022,8 +777,6 @@ def test_decr(cache): assert cache.decr('key') == -1 -@setup_cache -@nt.raises(StopIteration) def test_iter(cache): sequence = list('abcdef') + [('g',)] @@ -1036,25 +789,23 @@ def test_iter(cache): cache['h'] = 7 - next(iterator) + with pytest.raises(StopIteration): + next(iterator) -@setup_cache def test_iter_expire(cache): cache.reset('cull_limit', 0) for num in range(100): - cache.set(num, num, expire=0) + cache.set(num, num, expire=1e-9) assert len(cache) == 100 assert list(cache) == list(range(100)) -@setup_cache -@nt.raises(StopIteration) def test_iter_error(cache): - next(iter(cache)) + with pytest.raises(StopIteration): + next(iter(cache)) -@setup_cache def test_reversed(cache): sequence = 'abcdef' @@ -1074,13 +825,11 @@ def test_reversed(cache): assert False, 'StopIteration expected' -@setup_cache -@nt.raises(StopIteration) def test_reversed_error(cache): - next(reversed(cache)) + with pytest.raises(StopIteration): + next(reversed(cache)) -@setup_cache def test_push_pull(cache): for value in range(10): cache.push(value) @@ -1092,27 +841,32 @@ def test_push_pull(cache): assert len(cache) == 0 -@setup_cache def test_push_pull_prefix(cache): for value in range(10): cache.push(value, prefix='key') for value in range(10): + key, peek_value = cache.peek(prefix='key') key, pull_value = cache.pull(prefix='key') assert key.startswith('key') + assert peek_value == value assert pull_value == value assert len(cache) == 0 assert len(cache.check()) == 0 -@setup_cache def test_push_pull_extras(cache): cache.push('test') assert cache.pull() == (500000000000000, 'test') assert len(cache) == 0 cache.push('test', expire=10) + (key, value), expire_time = cache.peek(expire_time=True) + assert key == 500000000000000 + assert value == 'test' + assert expire_time > time.time() + assert len(cache) == 1 (key, value), expire_time = cache.pull(expire_time=True) assert key == 500000000000000 assert value == 'test' @@ -1120,6 +874,11 @@ def test_push_pull_extras(cache): assert len(cache) == 0 cache.push('test', tag='foo') + (key, value), tag = cache.peek(tag=True) + assert key == 500000000000000 + assert value == 'test' + assert tag == 'foo' + assert len(cache) == 1 (key, value), tag = cache.pull(tag=True) assert key == 500000000000000 assert value == 'test' @@ -1127,6 +886,12 @@ def test_push_pull_extras(cache): assert len(cache) == 0 cache.push('test') + (key, value), expire_time, tag = cache.peek(expire_time=True, tag=True) + assert key == 500000000000000 + assert value == 'test' + assert expire_time is None + assert tag is None + assert len(cache) == 1 (key, value), expire_time, tag = cache.pull(expire_time=True, tag=True) assert key == 500000000000000 assert value == 'test' @@ -1139,7 +904,6 @@ def test_push_pull_extras(cache): assert len(cache.check()) == 0 -@setup_cache def test_push_pull_expire(cache): cache.push(0, expire=0.1) cache.push(0, expire=0.1) @@ -1151,16 +915,33 @@ def test_push_pull_expire(cache): assert len(cache.check()) == 0 -@setup_cache +def test_push_peek_expire(cache): + cache.push(0, expire=0.1) + cache.push(0, expire=0.1) + cache.push(0, expire=0.1) + cache.push(1) + time.sleep(0.2) + assert cache.peek() == (500000000000003, 1) + assert len(cache) == 1 + assert len(cache.check()) == 0 + + def test_push_pull_large_value(cache): - value = b'test' * (2 ** 20) + value = b'test' * (2**20) cache.push(value) assert cache.pull() == (500000000000000, value) assert len(cache) == 0 assert len(cache.check()) == 0 -@setup_cache +def test_push_peek_large_value(cache): + value = b'test' * (2**20) + cache.push(value) + assert cache.peek() == (500000000000000, value) + assert len(cache) == 1 + assert len(cache.check()) == 0 + + def test_pull_ioerror(cache): assert cache.push(0) == 500000000000000 @@ -1179,9 +960,7 @@ def test_pull_ioerror(cache): assert cache.pull() == (None, None) -@setup_cache -@nt.raises(IOError) -def test_pull_ioerror_eacces(cache): +def test_peek_ioerror(cache): assert cache.push(0) == 500000000000000 disk = mock.Mock() @@ -1192,19 +971,69 @@ def test_pull_ioerror_eacces(cache): put.side_effect = [(0, True)] disk.fetch = fetch io_error = IOError() - io_error.errno = errno.EACCES - fetch.side_effect = io_error + io_error.errno = errno.ENOENT + fetch.side_effect = [io_error, 0] + + with mock.patch.object(cache, '_disk', disk): + _, value = cache.peek() + assert value == 0 + + +def test_peekitem_extras(cache): + with pytest.raises(KeyError): + cache.peekitem() + + assert cache.set('a', 0) + assert cache.set('b', 1) + assert cache.set('c', 2, expire=10, tag='foo') + assert cache.set('d', 3, expire=0.1) + assert cache.set('e', 4, expire=0.1) + + time.sleep(0.2) + + (key, value), expire_time, tag = cache.peekitem(expire_time=True, tag=True) + assert key == 'c' + assert value == 2 + assert expire_time > 0 + assert tag == 'foo' + + (key, value), expire_time = cache.peekitem(expire_time=True) + assert key == 'c' + assert value == 2 + assert expire_time > 0 + + (key, value), tag = cache.peekitem(tag=True) + assert key == 'c' + assert value == 2 + assert expire_time > 0 + assert tag == 'foo' + + +def test_peekitem_ioerror(cache): + assert cache.set('a', 0) + assert cache.set('b', 1) + assert cache.set('c', 2) + + disk = mock.Mock() + put = mock.Mock() + fetch = mock.Mock() + + disk.put = put + put.side_effect = [(0, True)] + disk.fetch = fetch + io_error = IOError() + io_error.errno = errno.ENOENT + fetch.side_effect = [io_error, 2] with mock.patch.object(cache, '_disk', disk): - cache.pull() + _, value = cache.peekitem() + assert value == 2 -@setup_cache def test_iterkeys(cache): assert list(cache.iterkeys()) == [] -@setup_cache def test_pickle(cache): for num, val in enumerate('abcde'): cache[val] = num @@ -1216,7 +1045,6 @@ def test_pickle(cache): assert other[key] == cache[key] -@setup_cache def test_pragmas(cache): results = [] @@ -1253,7 +1081,6 @@ def compare_pragmas(): assert all(results) -@setup_cache def test_size_limit_with_files(cache): cache.reset('cull_limit', 0) size_limit = 30 * cache.disk_min_file_size @@ -1268,7 +1095,6 @@ def test_size_limit_with_files(cache): assert cache.volume() <= size_limit -@setup_cache def test_size_limit_with_database(cache): cache.reset('cull_limit', 0) size_limit = 2 * cache.disk_min_file_size @@ -1284,7 +1110,6 @@ def test_size_limit_with_database(cache): assert cache.volume() <= size_limit -@setup_cache def test_cull_eviction_policy_none(cache): cache.reset('eviction_policy', 'none') size_limit = 2 * cache.disk_min_file_size @@ -1300,7 +1125,6 @@ def test_cull_eviction_policy_none(cache): assert cache.volume() > size_limit -@setup_cache def test_cull_size_limit_0(cache): cache.reset('cull_limit', 0) size_limit = 2 * cache.disk_min_file_size @@ -1316,8 +1140,6 @@ def test_cull_size_limit_0(cache): assert cache.volume() <= size_limit -@setup_cache -@nt.raises(dc.Timeout) def test_cull_timeout(cache): transact = mock.Mock() transact.side_effect = [dc.Timeout] @@ -1325,13 +1147,13 @@ def test_cull_timeout(cache): with mock.patch.object(cache, 'expire', lambda now: 0): with mock.patch.object(cache, 'volume', lambda: int(1e12)): with mock.patch.object(cache, '_transact', transact): - cache.cull() + with pytest.raises(dc.Timeout): + cache.cull() -@setup_cache def test_key_roundtrip(cache): - key_part_0 = u"part0" - key_part_1 = u"part1" + key_part_0 = 'part0' + key_part_1 = 'part1' to_test = [ (key_part_0, key_part_1), [key_part_0, key_part_1], @@ -1349,11 +1171,12 @@ def test_key_roundtrip(cache): def test_constant(): import diskcache.core + assert repr(diskcache.core.ENOVAL) == 'ENOVAL' def test_copy(): - cache_dir1 = op.join('tmp', 'foo') + cache_dir1 = tempfile.mkdtemp() with dc.Cache(cache_dir1) as cache1: for count in range(10): @@ -1362,7 +1185,8 @@ def test_copy(): for count in range(10, 20): cache1[count] = str(count) * int(1e5) - cache_dir2 = op.join('tmp', 'bar') + cache_dir2 = tempfile.mkdtemp() + shutil.rmtree(cache_dir2) shutil.copytree(cache_dir1, cache_dir2) with dc.Cache(cache_dir2) as cache2: @@ -1372,7 +1196,8 @@ def test_copy(): for count in range(10, 20): assert cache2[count] == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache_dir1, ignore_errors=True) + shutil.rmtree(cache_dir2, ignore_errors=True) def run(command): @@ -1392,8 +1217,8 @@ def test_rsync(): return # No rsync installed. Skip test. rsync_args = ['rsync', '-a', '--checksum', '--delete', '--stats'] - cache_dir1 = op.join('tmp', 'foo') + os.sep - cache_dir2 = op.join('tmp', 'bar') + os.sep + cache_dir1 = tempfile.mkdtemp() + os.sep + cache_dir2 = tempfile.mkdtemp() + os.sep # Store some items in cache_dir1. @@ -1445,10 +1270,10 @@ def test_rsync(): for count in range(300, 400): assert cache1[count] == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache_dir1, ignore_errors=True) + shutil.rmtree(cache_dir2, ignore_errors=True) -@setup_cache def test_custom_eviction_policy(cache): dc.EVICTION_POLICY['lru-gt-1s'] = { 'init': ( @@ -1485,7 +1310,6 @@ def test_custom_eviction_policy(cache): assert cache.volume() < size_limit -@setup_cache def test_lru_incr(cache): cache.reset('eviction_policy', 'least-recently-used') cache.incr(0) @@ -1493,6 +1317,91 @@ def test_lru_incr(cache): assert cache[0] == 0 -if __name__ == '__main__': - import nose - nose.runmodule() +def test_memoize(cache): + count = 1000 + + def fibiter(num): + alpha, beta = 0, 1 + + for _ in range(num): + alpha, beta = beta, alpha + beta + + return alpha + + @cache.memoize() + def fibrec(num): + if num == 0: + return 0 + elif num == 1: + return 1 + else: + return fibrec(num - 1) + fibrec(num - 2) + + cache.stats(enable=True) + + for value in range(count): + assert fibrec(value) == fibiter(value) + + hits1, misses1 = cache.stats() + + for value in range(count): + assert fibrec(value) == fibiter(value) + + hits2, misses2 = cache.stats() + + assert hits2 == (hits1 + count) + assert misses2 == misses1 + + +def test_memoize_kwargs(cache): + @cache.memoize(typed=True) + def foo(*args, **kwargs): + return args, kwargs + + assert foo(1, 2, 3, a=4, b=5) == ((1, 2, 3), {'a': 4, 'b': 5}) + + +def test_cleanup_dirs(cache): + value = b'\0' * 2**20 + start_count = len(os.listdir(cache.directory)) + for i in range(10): + cache[i] = value + set_count = len(os.listdir(cache.directory)) + assert set_count > start_count + for i in range(10): + del cache[i] + del_count = len(os.listdir(cache.directory)) + assert start_count == del_count + + +def test_disk_write_os_error(cache): + func = mock.Mock(side_effect=[OSError] * 10) + with mock.patch('diskcache.core.open', func): + with pytest.raises(OSError): + cache[0] = '\0' * 2**20 + + +def test_memoize_ignore(cache): + @cache.memoize(ignore={1, 'arg1'}) + def test(*args, **kwargs): + return args, kwargs + + cache.stats(enable=True) + assert test('a', 'b', 'c', arg0='d', arg1='e', arg2='f') + assert test('a', 'w', 'c', arg0='d', arg1='x', arg2='f') + assert test('a', 'y', 'c', arg0='d', arg1='z', arg2='f') + assert cache.stats() == (2, 1) + + +def test_memoize_iter(cache): + @cache.memoize() + def test(*args, **kwargs): + return sum(args) + sum(kwargs.values()) + + cache.clear() + assert test(1, 2, 3) + assert test(a=1, b=2, c=3) + assert test(-1, 0, 1, a=1, b=2, c=3) + assert len(cache) == 3 + for key in cache: + assert cache[key] == 6 diff --git a/tests/test_deque.py b/tests/test_deque.py index 14329d1..f997a86 100644 --- a/tests/test_deque.py +++ b/tests/test_deque.py @@ -1,10 +1,11 @@ -"Test diskcache.persistent.Deque." +"""Test diskcache.persistent.Deque.""" -import functools as ft -import mock -import nose.tools as nt import pickle import shutil +import tempfile +from unittest import mock + +import pytest import diskcache as dc from diskcache.core import ENOVAL @@ -17,21 +18,15 @@ def rmdir(directory): pass -def setup_deque(func): - @ft.wraps(func) - def wrapper(): - deque = dc.Deque() - try: - func(deque) - except Exception: - rmdir(deque.directory) - raise - - return wrapper +@pytest.fixture +def deque(): + deque = dc.Deque() + yield deque + rmdir(deque.directory) def test_init(): - directory = '/tmp/diskcache/deque' + directory = tempfile.mkdtemp() sequence = list('abcde') deque = dc.Deque(sequence, None) @@ -55,7 +50,6 @@ def test_init(): rmdir(directory) -@setup_deque def test_getsetdel(deque): sequence = list('abcde') assert len(deque) == 0 @@ -83,24 +77,82 @@ def test_getsetdel(deque): assert len(deque) == 0 -@setup_deque +def test_append(deque): + deque.maxlen = 3 + for item in 'abcde': + deque.append(item) + assert deque == 'cde' + + +def test_appendleft(deque): + deque.maxlen = 3 + for item in 'abcde': + deque.appendleft(item) + assert deque == 'edc' + + +def test_index_positive(deque): + cache = mock.MagicMock() + cache.__len__.return_value = 3 + cache.iterkeys.return_value = ['a', 'b', 'c'] + cache.__getitem__.side_effect = [KeyError, 101, 102] + with mock.patch.object(deque, '_cache', cache): + assert deque[0] == 101 + + +def test_index_negative(deque): + cache = mock.MagicMock() + cache.__len__.return_value = 3 + cache.iterkeys.return_value = ['c', 'b', 'a'] + cache.__getitem__.side_effect = [KeyError, 101, 100] + with mock.patch.object(deque, '_cache', cache): + assert deque[-1] == 101 + + +def test_index_out_of_range(deque): + cache = mock.MagicMock() + cache.__len__.return_value = 3 + cache.iterkeys.return_value = ['a', 'b', 'c'] + cache.__getitem__.side_effect = [KeyError] * 3 + with mock.patch.object(deque, '_cache', cache): + with pytest.raises(IndexError): + deque[0] + + +def test_iter_keyerror(deque): + cache = mock.MagicMock() + cache.iterkeys.return_value = ['a', 'b', 'c'] + cache.__getitem__.side_effect = [KeyError, 101, 102] + with mock.patch.object(deque, '_cache', cache): + assert list(iter(deque)) == [101, 102] + + def test_reversed(deque): sequence = list('abcde') deque += sequence assert list(reversed(deque)) == list(reversed(sequence)) -@setup_deque +def test_reversed_keyerror(deque): + cache = mock.MagicMock() + cache.iterkeys.return_value = ['c', 'b', 'a'] + cache.__getitem__.side_effect = [KeyError, 101, 100] + with mock.patch.object(deque, '_cache', cache): + assert list(reversed(deque)) == [101, 100] + + def test_state(deque): sequence = list('abcde') deque.extend(sequence) assert deque == sequence + deque.maxlen = 3 + assert list(deque) == sequence[-3:] state = pickle.dumps(deque) values = pickle.loads(state) - assert values == sequence + assert values == sequence[-3:] + assert values.maxlen == 3 -@setup_deque def test_compare(deque): assert not (deque == {}) assert not (deque == [0]) @@ -110,113 +162,30 @@ def test_compare(deque): assert deque <= [1] -@nt.raises(IndexError) -@setup_deque def test_indexerror_negative(deque): - deque[-1] + with pytest.raises(IndexError): + deque[-1] -@nt.raises(IndexError) -@setup_deque def test_indexerror(deque): - deque[0] - - -@nt.raises(IndexError) -@setup_deque -def test_indexerror_islice(deque): - islice = mock.Mock(side_effect=StopIteration) - - deque.append(0) - - with mock.patch('diskcache.persistent.islice', islice): - deque[0] - - -@setup_deque -def test_get_timeout(deque): - cache = mock.MagicMock() - cache.__len__.return_value = 1 - cache.iterkeys.side_effect = [iter([0]), iter([0])] - cache.__getitem__.side_effect = [dc.Timeout, 0] - - deque.append(0) - - with mock.patch.object(deque, '_cache', cache): + with pytest.raises(IndexError): deque[0] -@setup_deque -def test_set_timeout(deque): - cache = mock.MagicMock() - cache.__len__.return_value = 1 - cache.iterkeys.side_effect = [iter([0]), iter([0])] - cache.__setitem__.side_effect = [dc.Timeout, None] - - deque.append(0) - - with mock.patch.object(deque, '_cache', cache): - deque[0] = 0 - - -@setup_deque -def test_del_timeout(deque): - cache = mock.MagicMock() - cache.__len__.return_value = 1 - cache.iterkeys.side_effect = [iter([0]), iter([0])] - cache.__delitem__.side_effect = [dc.Timeout, None] - - deque.append(0) - - with mock.patch.object(deque, '_cache', cache): - del deque[0] - - def test_repr(): - directory = '/tmp/diskcache/deque' + directory = tempfile.mkdtemp() deque = dc.Deque(directory=directory) assert repr(deque) == 'Deque(directory=%r)' % directory -@setup_deque -def test_iter_timeout(deque): - cache = mock.MagicMock() - cache.iterkeys.side_effect = [iter([0, 1])] - cache.__getitem__.side_effect = [dc.Timeout, 0] - - with mock.patch.object(deque, '_cache', cache): - assert list(deque) == [0] - - -@setup_deque -def test_reversed_timeout(deque): - cache = mock.MagicMock() - cache.iterkeys.side_effect = [iter([0, 1])] - cache.__getitem__.side_effect = [dc.Timeout, 0] - - with mock.patch.object(deque, '_cache', cache): - assert list(reversed(deque)) == [0] - - -@setup_deque -def test_append_timeout(deque): - cache = mock.MagicMock() - cache.push.side_effect = [dc.Timeout, None] - - with mock.patch.object(deque, '_cache', cache): - deque.append(0) - - -@setup_deque -def test_appendleft_timeout(deque): - cache = mock.MagicMock() - cache.push.side_effect = [dc.Timeout, None] - - with mock.patch.object(deque, '_cache', cache): - deque.appendleft(0) +def test_copy(deque): + sequence = list('abcde') + deque.extend(sequence) + temp = deque.copy() + assert deque == sequence + assert temp == sequence -@setup_deque def test_count(deque): deque += 'abbcccddddeeeee' @@ -224,21 +193,18 @@ def test_count(deque): assert deque.count(value) == index -@setup_deque def test_extend(deque): sequence = list('abcde') deque.extend(sequence) assert deque == sequence -@setup_deque def test_extendleft(deque): sequence = list('abcde') deque.extendleft(sequence) assert deque == list(reversed(sequence)) -@setup_deque def test_pop(deque): sequence = list('abcde') deque.extend(sequence) @@ -247,22 +213,11 @@ def test_pop(deque): assert deque.pop() == sequence.pop() -@nt.raises(IndexError) -@setup_deque def test_pop_indexerror(deque): - deque.pop() - - -@setup_deque -def test_pop_timeout(deque): - cache = mock.MagicMock() - cache.pull.side_effect = [dc.Timeout, (None, 0)] - - with mock.patch.object(deque, '_cache', cache): - assert deque.pop() == 0 + with pytest.raises(IndexError): + deque.pop() -@setup_deque def test_popleft(deque): sequence = list('abcde') deque.extend(sequence) @@ -273,22 +228,11 @@ def test_popleft(deque): del sequence[0] -@nt.raises(IndexError) -@setup_deque def test_popleft_indexerror(deque): - deque.popleft() - - -@setup_deque -def test_popleft_timeout(deque): - cache = mock.MagicMock() - cache.pull.side_effect = [dc.Timeout, (None, 0)] - - with mock.patch.object(deque, '_cache', cache): - assert deque.popleft() == 0 + with pytest.raises(IndexError): + deque.popleft() -@setup_deque def test_remove(deque): deque.extend('abaca') deque.remove('a') @@ -299,37 +243,31 @@ def test_remove(deque): assert deque == 'bc' -@setup_deque -def test_remove_timeout(deque): - cache = mock.MagicMock() - cache.iterkeys.side_effect = [iter([0, 1, 2, 3, 4])] - cache.__getitem__.side_effect = [0, dc.Timeout, KeyError, 3, 3] - cache.__delitem__.side_effect = [KeyError, dc.Timeout, None] - - with mock.patch.object(deque, '_cache', cache): - deque.remove(3) +def test_remove_valueerror(deque): + with pytest.raises(ValueError): + deque.remove(0) -@nt.raises(ValueError) -@setup_deque -def test_remove_valueerror(deque): - deque.remove(0) +def test_remove_keyerror(deque): + cache = mock.MagicMock() + cache.iterkeys.return_value = ['a', 'b', 'c'] + cache.__getitem__.side_effect = [KeyError, 100, 100] + cache.__delitem__.side_effect = [KeyError, None] + with mock.patch.object(deque, '_cache', cache): + deque.remove(100) -@setup_deque def test_reverse(deque): deque += 'abcde' deque.reverse() assert deque == 'edcba' -@nt.raises(TypeError) -@setup_deque def test_rotate_typeerror(deque): - deque.rotate(0.5) + with pytest.raises(TypeError): + deque.rotate(0.5) -@setup_deque def test_rotate(deque): deque.rotate(1) deque.rotate(-1) @@ -338,14 +276,12 @@ def test_rotate(deque): assert deque == 'cdeab' -@setup_deque def test_rotate_negative(deque): deque += 'abcde' deque.rotate(-2) assert deque == 'cdeab' -@setup_deque def test_rotate_indexerror(deque): deque += 'abc' @@ -357,7 +293,6 @@ def test_rotate_indexerror(deque): deque.rotate(1) -@setup_deque def test_rotate_indexerror_negative(deque): deque += 'abc' @@ -369,10 +304,11 @@ def test_rotate_indexerror_negative(deque): deque.rotate(-1) -@setup_deque -def test_clear_timeout(deque): - cache = mock.MagicMock() - cache.clear.side_effect = [dc.Timeout, None] - - with mock.patch.object(deque, '_cache', cache): - deque.clear() +def test_peek(deque): + value = b'x' * 100_000 + deque.append(value) + assert len(deque) == 1 + assert deque.peek() == value + assert len(deque) == 1 + assert deque.peek() == value + assert len(deque) == 1 diff --git a/tests/test_djangocache.py b/tests/test_djangocache.py index b8e554f..734ba1b 100644 --- a/tests/test_djangocache.py +++ b/tests/test_djangocache.py @@ -1,53 +1,24 @@ -# -*- coding: utf-8 -*- - # Most of this file was copied from: -# https://raw.githubusercontent.com/django/django/1.11.12/tests/cache/tests.py +# https://raw.githubusercontent.com/django/django/stable/3.2.x/tests/cache/tests.py # Unit tests for cache framework # Uses whatever cache backend is set in the test settings file. -from __future__ import unicode_literals - -import copy -import io import os -import re +import pickle import shutil import tempfile -import threading import time -import unittest -import warnings +from unittest import mock from django.conf import settings -from django.core import management, signals -from django.core.cache import ( - DEFAULT_CACHE_ALIAS, CacheKeyWarning, cache, caches, -) -from django.core.cache.utils import make_template_fragment_key -from django.db import close_old_connections, connection, connections -from django.http import ( - HttpRequest, HttpResponse, HttpResponseNotModified, StreamingHttpResponse, -) +from django.core.cache import CacheKeyWarning, cache, caches +from django.http import HttpResponse from django.middleware.cache import ( - CacheMiddleware, FetchFromCacheMiddleware, UpdateCacheMiddleware, -) -from django.middleware.csrf import CsrfViewMiddleware -from django.template import engines -from django.template.context_processors import csrf -from django.template.response import TemplateResponse -from django.test import ( - RequestFactory, SimpleTestCase, TestCase, TransactionTestCase, - ignore_warnings, mock, override_settings, + FetchFromCacheMiddleware, + UpdateCacheMiddleware, ) +from django.test import RequestFactory, TestCase, override_settings from django.test.signals import setting_changed -from django.utils import six, timezone, translation -from django.utils.cache import ( - get_cache_key, learn_cache_key, patch_cache_control, - patch_response_headers, patch_vary_headers, -) -from django.utils.deprecation import RemovedInDjango21Warning -from django.utils.encoding import force_text -from django.views.decorators.cache import cache_page ################################################################################ # Setup Django for models import. @@ -55,27 +26,13 @@ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tests.settings') -############################################################################ -# GrantJ 2017-03-27 Ignore deprecation warnings. Django's metaclass magic does -# not always play well with Python 3.6. Read -# http://stackoverflow.com/questions/41343263/ for details -############################################################################ - -import warnings -warnings.filterwarnings('ignore', category=DeprecationWarning) - import django + django.setup() from .models import Poll, expensive_calculation -try: # Use the same idiom as in cache backends - from django.utils.six.moves import cPickle as pickle -except ImportError: - import pickle - - # functions/classes for complex data type tests def f(): return 42 @@ -86,32 +43,37 @@ def m(n): return 24 -class Unpicklable(object): +class Unpicklable: def __getstate__(self): raise pickle.PickleError() +def empty_response(request): + return HttpResponse() + + +KEY_ERRORS_WITH_MEMCACHED_MSG = ( + 'Cache key contains characters that will cause errors if used with ' + 'memcached: %r' +) + + class UnpicklableType(object): # Unpicklable using the default pickling protocol on Python 2. - __slots__ = 'a', + __slots__ = ('a',) def custom_key_func(key, key_prefix, version): - "A customized cache key function" + """A customized cache key function""" return 'CUSTOM-' + '-'.join([key_prefix, str(version), key]) -def custom_key_func2(key, key_prefix, version): - "Another customized cache key function" - return '-'.join(['CUSTOM', key_prefix, str(version), key]) - - _caches_setting_base = { 'default': {}, 'prefix': {'KEY_PREFIX': 'cacheprefix{}'.format(os.getpid())}, 'v2': {'VERSION': 2}, 'custom_key': {'KEY_FUNCTION': custom_key_func}, - 'custom_key2': {'KEY_FUNCTION': custom_key_func2}, + 'custom_key2': {'KEY_FUNCTION': 'tests.test_djangocache.custom_key_func'}, 'cull': {'OPTIONS': {'MAX_ENTRIES': 30}}, 'zero_cull': {'OPTIONS': {'CULL_FREQUENCY': 0, 'MAX_ENTRIES': 30}}, } @@ -127,40 +89,52 @@ def caches_setting_for_tests(base=None, exclude=None, **params): # params -> _caches_setting_base -> base base = base or {} exclude = exclude or set() - setting = {k: base.copy() for k in _caches_setting_base.keys() if k not in exclude} + setting = { + k: base.copy() for k in _caches_setting_base if k not in exclude + } for key, cache_params in setting.items(): cache_params.update(_caches_setting_base[key]) cache_params.update(params) return setting -class BaseCacheTests(object): +class BaseCacheTests: # A common set of tests to apply to all cache backends + factory = RequestFactory() - def setUp(self): - self.factory = RequestFactory() + # RemovedInDjango41Warning: python-memcached doesn't support .get() with + # default. + supports_get_with_default = True + + # Some clients raise custom exceptions when .incr() or .decr() are called + # with a non-integer value. + incr_decr_type_error = TypeError def tearDown(self): cache.clear() def test_simple(self): # Simple cache set/get works - cache.set("key", "value") - self.assertEqual(cache.get("key"), "value") + cache.set('key', 'value') + self.assertEqual(cache.get('key'), 'value') + + def test_default_used_when_none_is_set(self): + """If None is cached, get() returns it instead of the default.""" + cache.set('key_default_none', None) + self.assertIsNone(cache.get('key_default_none', default='default')) def test_add(self): # A key can be added to a cache - cache.add("addkey1", "value") - result = cache.add("addkey1", "newvalue") - self.assertFalse(result) - self.assertEqual(cache.get("addkey1"), "value") + self.assertIs(cache.add('addkey1', 'value'), True) + self.assertIs(cache.add('addkey1', 'newvalue'), False) + self.assertEqual(cache.get('addkey1'), 'value') def test_prefix(self): # Test for same cache key conflicts between shared backend cache.set('somekey', 'value') # should not be set in the prefixed cache - self.assertFalse(caches['prefix'].has_key('somekey')) + self.assertIs(caches['prefix'].has_key('somekey'), False) caches['prefix'].set('somekey', 'value2') @@ -168,42 +142,57 @@ def test_prefix(self): self.assertEqual(caches['prefix'].get('somekey'), 'value2') def test_non_existent(self): - # Non-existent cache keys return as None/default - # get with non-existent keys - self.assertIsNone(cache.get("does_not_exist")) - self.assertEqual(cache.get("does_not_exist", "bang!"), "bang!") + """Nonexistent cache keys return as None/default.""" + self.assertIsNone(cache.get('does_not_exist')) + self.assertEqual(cache.get('does_not_exist', 'bang!'), 'bang!') def test_get_many(self): # Multiple cache keys can be returned using get_many - cache.set('a', 'a') - cache.set('b', 'b') - cache.set('c', 'c') - cache.set('d', 'd') - self.assertDictEqual(cache.get_many(['a', 'c', 'd']), {'a': 'a', 'c': 'c', 'd': 'd'}) - self.assertDictEqual(cache.get_many(['a', 'b', 'e']), {'a': 'a', 'b': 'b'}) + cache.set_many({'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd'}) + self.assertEqual( + cache.get_many(['a', 'c', 'd']), {'a': 'a', 'c': 'c', 'd': 'd'} + ) + self.assertEqual(cache.get_many(['a', 'b', 'e']), {'a': 'a', 'b': 'b'}) + self.assertEqual( + cache.get_many(iter(['a', 'b', 'e'])), {'a': 'a', 'b': 'b'} + ) + cache.set_many({'x': None, 'y': 1}) + self.assertEqual(cache.get_many(['x', 'y']), {'x': None, 'y': 1}) def test_delete(self): # Cache keys can be deleted - cache.set("key1", "spam") - cache.set("key2", "eggs") - self.assertEqual(cache.get("key1"), "spam") - cache.delete("key1") - self.assertIsNone(cache.get("key1")) - self.assertEqual(cache.get("key2"), "eggs") + cache.set_many({'key1': 'spam', 'key2': 'eggs'}) + self.assertEqual(cache.get('key1'), 'spam') + self.assertIs(cache.delete('key1'), True) + self.assertIsNone(cache.get('key1')) + self.assertEqual(cache.get('key2'), 'eggs') + + def test_delete_nonexistent(self): + self.assertIs(cache.delete('nonexistent_key'), False) def test_has_key(self): # The cache can be inspected for cache keys - cache.set("hello1", "goodbye1") - self.assertTrue(cache.has_key("hello1")) - self.assertFalse(cache.has_key("goodbye1")) - cache.set("no_expiry", "here", None) - self.assertTrue(cache.has_key("no_expiry")) + cache.set('hello1', 'goodbye1') + self.assertIs(cache.has_key('hello1'), True) + self.assertIs(cache.has_key('goodbye1'), False) + cache.set('no_expiry', 'here', None) + self.assertIs(cache.has_key('no_expiry'), True) + cache.set('null', None) + self.assertIs( + cache.has_key('null'), + True if self.supports_get_with_default else False, + ) def test_in(self): # The in operator can be used to inspect cache contents - cache.set("hello2", "goodbye2") - self.assertIn("hello2", cache) - self.assertNotIn("goodbye2", cache) + cache.set('hello2', 'goodbye2') + self.assertIn('hello2', cache) + self.assertNotIn('goodbye2', cache) + cache.set('null', None) + if self.supports_get_with_default: + self.assertIn('null', cache) + else: + self.assertNotIn('null', cache) def test_incr(self): # Cache values can be incremented @@ -215,6 +204,9 @@ def test_incr(self): self.assertEqual(cache.incr('answer', -10), 42) with self.assertRaises(ValueError): cache.incr('does_not_exist') + cache.set('null', None) + with self.assertRaises(self.incr_decr_type_error): + cache.incr('null') def test_decr(self): # Cache values can be decremented @@ -226,6 +218,9 @@ def test_decr(self): self.assertEqual(cache.decr('answer', -10), 42) with self.assertRaises(ValueError): cache.decr('does_not_exist') + cache.set('null', None) + with self.assertRaises(self.incr_decr_type_error): + cache.decr('null') def test_close(self): self.assertTrue(hasattr(cache, 'close')) @@ -242,14 +237,14 @@ def test_data_types(self): 'function': f, 'class': C, } - cache.set("stuff", stuff) - self.assertEqual(cache.get("stuff"), stuff) + cache.set('stuff', stuff) + self.assertEqual(cache.get('stuff'), stuff) def test_cache_read_for_model_instance(self): # Don't want fields with callable as default to be called on cache read expensive_calculation.num_runs = 0 Poll.objects.all().delete() - my_poll = Poll.objects.create(question="Well?") + my_poll = Poll.objects.create(question='Well?') self.assertEqual(Poll.objects.count(), 1) pub_date = my_poll.pub_date cache.set('question', my_poll) @@ -262,7 +257,7 @@ def test_cache_write_for_model_instance_with_deferred(self): # Don't want fields with callable as default to be called on cache write expensive_calculation.num_runs = 0 Poll.objects.all().delete() - Poll.objects.create(question="What?") + Poll.objects.create(question='What?') self.assertEqual(expensive_calculation.num_runs, 1) defer_qs = Poll.objects.all().defer('question') self.assertEqual(defer_qs.count(), 1) @@ -275,7 +270,7 @@ def test_cache_read_for_model_instance_with_deferred(self): # Don't want fields with callable as default to be called on cache read expensive_calculation.num_runs = 0 Poll.objects.all().delete() - Poll.objects.create(question="What?") + Poll.objects.create(question='What?') self.assertEqual(expensive_calculation.num_runs, 1) defer_qs = Poll.objects.all().defer('question') self.assertEqual(defer_qs.count(), 1) @@ -284,7 +279,9 @@ def test_cache_read_for_model_instance_with_deferred(self): runs_before_cache_read = expensive_calculation.num_runs cache.get('deferred_queryset') # We only want the default expensive calculation run on creation and set - self.assertEqual(expensive_calculation.num_runs, runs_before_cache_read) + self.assertEqual( + expensive_calculation.num_runs, runs_before_cache_read + ) def test_expiration(self): # Cache values can be set to expire @@ -293,11 +290,27 @@ def test_expiration(self): cache.set('expire3', 'very quickly', 1) time.sleep(2) - self.assertIsNone(cache.get("expire1")) + self.assertIsNone(cache.get('expire1')) - cache.add("expire2", "newvalue") - self.assertEqual(cache.get("expire2"), "newvalue") - self.assertFalse(cache.has_key("expire3")) + self.assertIs(cache.add('expire2', 'newvalue'), True) + self.assertEqual(cache.get('expire2'), 'newvalue') + self.assertIs(cache.has_key('expire3'), False) + + def test_touch(self): + # cache.touch() updates the timeout. + cache.set('expire1', 'very quickly', timeout=1) + self.assertIs(cache.touch('expire1', timeout=4), True) + time.sleep(2) + self.assertIs(cache.has_key('expire1'), True) + time.sleep(3) + self.assertIs(cache.has_key('expire1'), False) + # cache.touch() works without the timeout argument. + cache.set('expire1', 'very quickly', timeout=1) + self.assertIs(cache.touch('expire1'), True) + time.sleep(2) + self.assertIs(cache.has_key('expire1'), True) + + self.assertIs(cache.touch('nonexistent'), False) def test_unicode(self): # Unicode values can be cached @@ -305,29 +318,33 @@ def test_unicode(self): 'ascii': 'ascii_value', 'unicode_ascii': 'Iñtërnâtiônàlizætiøn1', 'Iñtërnâtiônàlizætiøn': 'Iñtërnâtiônàlizætiøn2', - 'ascii2': {'x': 1} + 'ascii2': {'x': 1}, } # Test `set` for (key, value) in stuff.items(): - cache.set(key, value) - self.assertEqual(cache.get(key), value) + with self.subTest(key=key): + cache.set(key, value) + self.assertEqual(cache.get(key), value) # Test `add` for (key, value) in stuff.items(): - cache.delete(key) - cache.add(key, value) - self.assertEqual(cache.get(key), value) + with self.subTest(key=key): + self.assertIs(cache.delete(key), True) + self.assertIs(cache.add(key, value), True) + self.assertEqual(cache.get(key), value) # Test `set_many` for (key, value) in stuff.items(): - cache.delete(key) + self.assertIs(cache.delete(key), True) cache.set_many(stuff) for (key, value) in stuff.items(): - self.assertEqual(cache.get(key), value) + with self.subTest(key=key): + self.assertEqual(cache.get(key), value) def test_binary_string(self): # Binary strings should be cacheable from zlib import compress, decompress + value = 'value_to_be_compressed' compressed_value = compress(value.encode()) @@ -338,7 +355,7 @@ def test_binary_string(self): self.assertEqual(value, decompress(compressed_result).decode()) # Test add - cache.add('binary1-add', compressed_value) + self.assertIs(cache.add('binary1-add', compressed_value), True) compressed_result = cache.get('binary1-add') self.assertEqual(compressed_value, compressed_result) self.assertEqual(value, decompress(compressed_result).decode()) @@ -351,48 +368,53 @@ def test_binary_string(self): def test_set_many(self): # Multiple keys can be set using set_many - cache.set_many({"key1": "spam", "key2": "eggs"}) - self.assertEqual(cache.get("key1"), "spam") - self.assertEqual(cache.get("key2"), "eggs") + cache.set_many({'key1': 'spam', 'key2': 'eggs'}) + self.assertEqual(cache.get('key1'), 'spam') + self.assertEqual(cache.get('key2'), 'eggs') + + def test_set_many_returns_empty_list_on_success(self): + """set_many() returns an empty list when all keys are inserted.""" + failing_keys = cache.set_many({'key1': 'spam', 'key2': 'eggs'}) + self.assertEqual(failing_keys, []) def test_set_many_expiration(self): # set_many takes a second ``timeout`` parameter - cache.set_many({"key1": "spam", "key2": "eggs"}, 1) + cache.set_many({'key1': 'spam', 'key2': 'eggs'}, 1) time.sleep(2) - self.assertIsNone(cache.get("key1")) - self.assertIsNone(cache.get("key2")) + self.assertIsNone(cache.get('key1')) + self.assertIsNone(cache.get('key2')) def test_delete_many(self): # Multiple keys can be deleted using delete_many - cache.set("key1", "spam") - cache.set("key2", "eggs") - cache.set("key3", "ham") - cache.delete_many(["key1", "key2"]) - self.assertIsNone(cache.get("key1")) - self.assertIsNone(cache.get("key2")) - self.assertEqual(cache.get("key3"), "ham") + cache.set_many({'key1': 'spam', 'key2': 'eggs', 'key3': 'ham'}) + cache.delete_many(['key1', 'key2']) + self.assertIsNone(cache.get('key1')) + self.assertIsNone(cache.get('key2')) + self.assertEqual(cache.get('key3'), 'ham') def test_clear(self): # The cache can be emptied using clear - cache.set("key1", "spam") - cache.set("key2", "eggs") + cache.set_many({'key1': 'spam', 'key2': 'eggs'}) cache.clear() - self.assertIsNone(cache.get("key1")) - self.assertIsNone(cache.get("key2")) + self.assertIsNone(cache.get('key1')) + self.assertIsNone(cache.get('key2')) def test_long_timeout(self): """ - Followe memcached's convention where a timeout greater than 30 days is + Follow memcached's convention where a timeout greater than 30 days is treated as an absolute expiration timestamp instead of a relative offset (#12399). """ cache.set('key1', 'eggs', 60 * 60 * 24 * 30 + 1) # 30 days + 1 second self.assertEqual(cache.get('key1'), 'eggs') - cache.add('key2', 'ham', 60 * 60 * 24 * 30 + 1) + self.assertIs(cache.add('key2', 'ham', 60 * 60 * 24 * 30 + 1), True) self.assertEqual(cache.get('key2'), 'ham') - cache.set_many({'key3': 'sausage', 'key4': 'lobster bisque'}, 60 * 60 * 24 * 30 + 1) + cache.set_many( + {'key3': 'sausage', 'key4': 'lobster bisque'}, + 60 * 60 * 24 * 30 + 1, + ) self.assertEqual(cache.get('key3'), 'sausage') self.assertEqual(cache.get('key4'), 'lobster bisque') @@ -403,16 +425,20 @@ def test_forever_timeout(self): cache.set('key1', 'eggs', None) self.assertEqual(cache.get('key1'), 'eggs') - cache.add('key2', 'ham', None) + self.assertIs(cache.add('key2', 'ham', None), True) self.assertEqual(cache.get('key2'), 'ham') - added = cache.add('key1', 'new eggs', None) - self.assertIs(added, False) + self.assertIs(cache.add('key1', 'new eggs', None), False) self.assertEqual(cache.get('key1'), 'eggs') cache.set_many({'key3': 'sausage', 'key4': 'lobster bisque'}, None) self.assertEqual(cache.get('key3'), 'sausage') self.assertEqual(cache.get('key4'), 'lobster bisque') + cache.set('key5', 'belgian fries', timeout=1) + self.assertIs(cache.touch('key5', timeout=None), True) + time.sleep(2) + self.assertEqual(cache.get('key5'), 'belgian fries') + def test_zero_timeout(self): """ Passing in zero into timeout results in a value that is not cached @@ -420,19 +446,28 @@ def test_zero_timeout(self): cache.set('key1', 'eggs', 0) self.assertIsNone(cache.get('key1')) - cache.add('key2', 'ham', 0) + self.assertIs(cache.add('key2', 'ham', 0), True) self.assertIsNone(cache.get('key2')) cache.set_many({'key3': 'sausage', 'key4': 'lobster bisque'}, 0) self.assertIsNone(cache.get('key3')) self.assertIsNone(cache.get('key4')) + cache.set('key5', 'belgian fries', timeout=5) + self.assertIs(cache.touch('key5', timeout=0), True) + self.assertIsNone(cache.get('key5')) + def test_float_timeout(self): # Make sure a timeout given as a float doesn't crash anything. - cache.set("key1", "spam", 100.2) - self.assertEqual(cache.get("key1"), "spam") + cache.set('key1', 'spam', 100.2) + self.assertEqual(cache.get('key1'), 'spam') + + def _perform_cull_test(self, cull_cache_name, initial_count, final_count): + try: + cull_cache = caches[cull_cache_name] + except InvalidCacheBackendError: + self.skipTest("Culling isn't implemented.") - def _perform_cull_test(self, cull_cache, initial_count, final_count): # Create initial cache key entries. This will overflow the cache, # causing a cull. for i in range(1, initial_count): @@ -445,17 +480,31 @@ def _perform_cull_test(self, cull_cache, initial_count, final_count): self.assertEqual(count, final_count) def test_cull(self): - self._perform_cull_test(caches['cull'], 50, 29) + self._perform_cull_test('cull', 50, 29) def test_zero_cull(self): - self._perform_cull_test(caches['zero_cull'], 50, 19) + self._perform_cull_test('zero_cull', 50, 19) + + def test_cull_delete_when_store_empty(self): + try: + cull_cache = caches['cull'] + except InvalidCacheBackendError: + self.skipTest("Culling isn't implemented.") + old_max_entries = cull_cache._max_entries + # Force _cull to delete on first cached record. + cull_cache._max_entries = -1 + try: + cull_cache.set('force_cull_delete', 'value', 1000) + self.assertIs(cull_cache.has_key('force_cull_delete'), True) + finally: + cull_cache._max_entries = old_max_entries def _perform_invalid_key_test(self, key, expected_warning): """ - All the builtin backends (except memcached, see below) should warn on - keys that would be refused by memcached. This encourages portable - caching code without making it too difficult to use production backends - with more liberal key rules. Refs #6447. + All the builtin backends should warn (except memcached that should + error) on keys that would be refused by memcached. This encourages + portable caching code without making it too difficult to use production + backends with more liberal key rules. Refs #6447. """ # mimic custom ``make_key`` method being defined since the default will # never show the below warnings @@ -465,24 +514,33 @@ def func(key, *args): old_func = cache.key_func cache.key_func = func + tests = [ + ('add', [key, 1]), + ('get', [key]), + ('set', [key, 1]), + ('incr', [key]), + ('decr', [key]), + ('touch', [key]), + ('delete', [key]), + ('get_many', [[key, 'b']]), + ('set_many', [{key: 1, 'b': 2}]), + ('delete_many', [{key: 1, 'b': 2}]), + ] try: - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - cache.set(key, 'value') - self.assertEqual(len(w), 1) - self.assertIsInstance(w[0].message, CacheKeyWarning) - self.assertEqual(str(w[0].message.args[0]), expected_warning) + for operation, args in tests: + with self.subTest(operation=operation): + with self.assertWarns(CacheKeyWarning) as cm: + getattr(cache, operation)(*args) + self.assertEqual(str(cm.warning), expected_warning) finally: cache.key_func = old_func def test_invalid_key_characters(self): # memcached doesn't allow whitespace or control characters in keys. key = 'key with spaces and 清' - expected_warning = ( - "Cache key contains characters that will cause errors if used " - "with memcached: %r" % key + self._perform_invalid_key_test( + key, KEY_ERRORS_WITH_MEMCACHED_MSG % key ) - self._perform_invalid_key_test(key, expected_warning) def test_invalid_key_length(self): # memcached limits key length to 250. @@ -537,41 +595,41 @@ def test_cache_versioning_get_set(self): def test_cache_versioning_add(self): # add, default version = 1, but manually override version = 2 - cache.add('answer1', 42, version=2) + self.assertIs(cache.add('answer1', 42, version=2), True) self.assertIsNone(cache.get('answer1', version=1)) self.assertEqual(cache.get('answer1', version=2), 42) - cache.add('answer1', 37, version=2) + self.assertIs(cache.add('answer1', 37, version=2), False) self.assertIsNone(cache.get('answer1', version=1)) self.assertEqual(cache.get('answer1', version=2), 42) - cache.add('answer1', 37, version=1) + self.assertIs(cache.add('answer1', 37, version=1), True) self.assertEqual(cache.get('answer1', version=1), 37) self.assertEqual(cache.get('answer1', version=2), 42) # v2 add, using default version = 2 - caches['v2'].add('answer2', 42) + self.assertIs(caches['v2'].add('answer2', 42), True) self.assertIsNone(cache.get('answer2', version=1)) self.assertEqual(cache.get('answer2', version=2), 42) - caches['v2'].add('answer2', 37) + self.assertIs(caches['v2'].add('answer2', 37), False) self.assertIsNone(cache.get('answer2', version=1)) self.assertEqual(cache.get('answer2', version=2), 42) - caches['v2'].add('answer2', 37, version=1) + self.assertIs(caches['v2'].add('answer2', 37, version=1), True) self.assertEqual(cache.get('answer2', version=1), 37) self.assertEqual(cache.get('answer2', version=2), 42) # v2 add, default version = 2, but manually override version = 1 - caches['v2'].add('answer3', 42, version=1) + self.assertIs(caches['v2'].add('answer3', 42, version=1), True) self.assertEqual(cache.get('answer3', version=1), 42) self.assertIsNone(cache.get('answer3', version=2)) - caches['v2'].add('answer3', 37, version=1) + self.assertIs(caches['v2'].add('answer3', 37, version=1), False) self.assertEqual(cache.get('answer3', version=1), 42) self.assertIsNone(cache.get('answer3', version=2)) - caches['v2'].add('answer3', 37) + self.assertIs(caches['v2'].add('answer3', 37), True) self.assertEqual(cache.get('answer3', version=1), 42) self.assertEqual(cache.get('answer3', version=2), 37) @@ -579,116 +637,158 @@ def test_cache_versioning_has_key(self): cache.set('answer1', 42) # has_key - self.assertTrue(cache.has_key('answer1')) - self.assertTrue(cache.has_key('answer1', version=1)) - self.assertFalse(cache.has_key('answer1', version=2)) + self.assertIs(cache.has_key('answer1'), True) + self.assertIs(cache.has_key('answer1', version=1), True) + self.assertIs(cache.has_key('answer1', version=2), False) - self.assertFalse(caches['v2'].has_key('answer1')) - self.assertTrue(caches['v2'].has_key('answer1', version=1)) - self.assertFalse(caches['v2'].has_key('answer1', version=2)) + self.assertIs(caches['v2'].has_key('answer1'), False) + self.assertIs(caches['v2'].has_key('answer1', version=1), True) + self.assertIs(caches['v2'].has_key('answer1', version=2), False) def test_cache_versioning_delete(self): cache.set('answer1', 37, version=1) cache.set('answer1', 42, version=2) - cache.delete('answer1') + self.assertIs(cache.delete('answer1'), True) self.assertIsNone(cache.get('answer1', version=1)) self.assertEqual(cache.get('answer1', version=2), 42) cache.set('answer2', 37, version=1) cache.set('answer2', 42, version=2) - cache.delete('answer2', version=2) + self.assertIs(cache.delete('answer2', version=2), True) self.assertEqual(cache.get('answer2', version=1), 37) self.assertIsNone(cache.get('answer2', version=2)) cache.set('answer3', 37, version=1) cache.set('answer3', 42, version=2) - caches['v2'].delete('answer3') + self.assertIs(caches['v2'].delete('answer3'), True) self.assertEqual(cache.get('answer3', version=1), 37) self.assertIsNone(cache.get('answer3', version=2)) cache.set('answer4', 37, version=1) cache.set('answer4', 42, version=2) - caches['v2'].delete('answer4', version=1) + self.assertIs(caches['v2'].delete('answer4', version=1), True) self.assertIsNone(cache.get('answer4', version=1)) self.assertEqual(cache.get('answer4', version=2), 42) def test_cache_versioning_incr_decr(self): cache.set('answer1', 37, version=1) cache.set('answer1', 42, version=2) - cache.incr('answer1') + self.assertEqual(cache.incr('answer1'), 38) self.assertEqual(cache.get('answer1', version=1), 38) self.assertEqual(cache.get('answer1', version=2), 42) - cache.decr('answer1') + self.assertEqual(cache.decr('answer1'), 37) self.assertEqual(cache.get('answer1', version=1), 37) self.assertEqual(cache.get('answer1', version=2), 42) cache.set('answer2', 37, version=1) cache.set('answer2', 42, version=2) - cache.incr('answer2', version=2) + self.assertEqual(cache.incr('answer2', version=2), 43) self.assertEqual(cache.get('answer2', version=1), 37) self.assertEqual(cache.get('answer2', version=2), 43) - cache.decr('answer2', version=2) + self.assertEqual(cache.decr('answer2', version=2), 42) self.assertEqual(cache.get('answer2', version=1), 37) self.assertEqual(cache.get('answer2', version=2), 42) cache.set('answer3', 37, version=1) cache.set('answer3', 42, version=2) - caches['v2'].incr('answer3') + self.assertEqual(caches['v2'].incr('answer3'), 43) self.assertEqual(cache.get('answer3', version=1), 37) self.assertEqual(cache.get('answer3', version=2), 43) - caches['v2'].decr('answer3') + self.assertEqual(caches['v2'].decr('answer3'), 42) self.assertEqual(cache.get('answer3', version=1), 37) self.assertEqual(cache.get('answer3', version=2), 42) cache.set('answer4', 37, version=1) cache.set('answer4', 42, version=2) - caches['v2'].incr('answer4', version=1) + self.assertEqual(caches['v2'].incr('answer4', version=1), 38) self.assertEqual(cache.get('answer4', version=1), 38) self.assertEqual(cache.get('answer4', version=2), 42) - caches['v2'].decr('answer4', version=1) + self.assertEqual(caches['v2'].decr('answer4', version=1), 37) self.assertEqual(cache.get('answer4', version=1), 37) self.assertEqual(cache.get('answer4', version=2), 42) def test_cache_versioning_get_set_many(self): # set, using default version = 1 cache.set_many({'ford1': 37, 'arthur1': 42}) - self.assertDictEqual(cache.get_many(['ford1', 'arthur1']), {'ford1': 37, 'arthur1': 42}) - self.assertDictEqual(cache.get_many(['ford1', 'arthur1'], version=1), {'ford1': 37, 'arthur1': 42}) - self.assertDictEqual(cache.get_many(['ford1', 'arthur1'], version=2), {}) + self.assertEqual( + cache.get_many(['ford1', 'arthur1']), {'ford1': 37, 'arthur1': 42} + ) + self.assertEqual( + cache.get_many(['ford1', 'arthur1'], version=1), + {'ford1': 37, 'arthur1': 42}, + ) + self.assertEqual(cache.get_many(['ford1', 'arthur1'], version=2), {}) - self.assertDictEqual(caches['v2'].get_many(['ford1', 'arthur1']), {}) - self.assertDictEqual(caches['v2'].get_many(['ford1', 'arthur1'], version=1), {'ford1': 37, 'arthur1': 42}) - self.assertDictEqual(caches['v2'].get_many(['ford1', 'arthur1'], version=2), {}) + self.assertEqual(caches['v2'].get_many(['ford1', 'arthur1']), {}) + self.assertEqual( + caches['v2'].get_many(['ford1', 'arthur1'], version=1), + {'ford1': 37, 'arthur1': 42}, + ) + self.assertEqual( + caches['v2'].get_many(['ford1', 'arthur1'], version=2), {} + ) # set, default version = 1, but manually override version = 2 cache.set_many({'ford2': 37, 'arthur2': 42}, version=2) - self.assertDictEqual(cache.get_many(['ford2', 'arthur2']), {}) - self.assertDictEqual(cache.get_many(['ford2', 'arthur2'], version=1), {}) - self.assertDictEqual(cache.get_many(['ford2', 'arthur2'], version=2), {'ford2': 37, 'arthur2': 42}) + self.assertEqual(cache.get_many(['ford2', 'arthur2']), {}) + self.assertEqual(cache.get_many(['ford2', 'arthur2'], version=1), {}) + self.assertEqual( + cache.get_many(['ford2', 'arthur2'], version=2), + {'ford2': 37, 'arthur2': 42}, + ) - self.assertDictEqual(caches['v2'].get_many(['ford2', 'arthur2']), {'ford2': 37, 'arthur2': 42}) - self.assertDictEqual(caches['v2'].get_many(['ford2', 'arthur2'], version=1), {}) - self.assertDictEqual(caches['v2'].get_many(['ford2', 'arthur2'], version=2), {'ford2': 37, 'arthur2': 42}) + self.assertEqual( + caches['v2'].get_many(['ford2', 'arthur2']), + {'ford2': 37, 'arthur2': 42}, + ) + self.assertEqual( + caches['v2'].get_many(['ford2', 'arthur2'], version=1), {} + ) + self.assertEqual( + caches['v2'].get_many(['ford2', 'arthur2'], version=2), + {'ford2': 37, 'arthur2': 42}, + ) # v2 set, using default version = 2 caches['v2'].set_many({'ford3': 37, 'arthur3': 42}) - self.assertDictEqual(cache.get_many(['ford3', 'arthur3']), {}) - self.assertDictEqual(cache.get_many(['ford3', 'arthur3'], version=1), {}) - self.assertDictEqual(cache.get_many(['ford3', 'arthur3'], version=2), {'ford3': 37, 'arthur3': 42}) + self.assertEqual(cache.get_many(['ford3', 'arthur3']), {}) + self.assertEqual(cache.get_many(['ford3', 'arthur3'], version=1), {}) + self.assertEqual( + cache.get_many(['ford3', 'arthur3'], version=2), + {'ford3': 37, 'arthur3': 42}, + ) - self.assertDictEqual(caches['v2'].get_many(['ford3', 'arthur3']), {'ford3': 37, 'arthur3': 42}) - self.assertDictEqual(caches['v2'].get_many(['ford3', 'arthur3'], version=1), {}) - self.assertDictEqual(caches['v2'].get_many(['ford3', 'arthur3'], version=2), {'ford3': 37, 'arthur3': 42}) + self.assertEqual( + caches['v2'].get_many(['ford3', 'arthur3']), + {'ford3': 37, 'arthur3': 42}, + ) + self.assertEqual( + caches['v2'].get_many(['ford3', 'arthur3'], version=1), {} + ) + self.assertEqual( + caches['v2'].get_many(['ford3', 'arthur3'], version=2), + {'ford3': 37, 'arthur3': 42}, + ) # v2 set, default version = 2, but manually override version = 1 caches['v2'].set_many({'ford4': 37, 'arthur4': 42}, version=1) - self.assertDictEqual(cache.get_many(['ford4', 'arthur4']), {'ford4': 37, 'arthur4': 42}) - self.assertDictEqual(cache.get_many(['ford4', 'arthur4'], version=1), {'ford4': 37, 'arthur4': 42}) - self.assertDictEqual(cache.get_many(['ford4', 'arthur4'], version=2), {}) + self.assertEqual( + cache.get_many(['ford4', 'arthur4']), {'ford4': 37, 'arthur4': 42} + ) + self.assertEqual( + cache.get_many(['ford4', 'arthur4'], version=1), + {'ford4': 37, 'arthur4': 42}, + ) + self.assertEqual(cache.get_many(['ford4', 'arthur4'], version=2), {}) - self.assertDictEqual(caches['v2'].get_many(['ford4', 'arthur4']), {}) - self.assertDictEqual(caches['v2'].get_many(['ford4', 'arthur4'], version=1), {'ford4': 37, 'arthur4': 42}) - self.assertDictEqual(caches['v2'].get_many(['ford4', 'arthur4'], version=2), {}) + self.assertEqual(caches['v2'].get_many(['ford4', 'arthur4']), {}) + self.assertEqual( + caches['v2'].get_many(['ford4', 'arthur4'], version=1), + {'ford4': 37, 'arthur4': 42}, + ) + self.assertEqual( + caches['v2'].get_many(['ford4', 'arthur4'], version=2), {} + ) def test_incr_version(self): cache.set('answer', 42, version=2) @@ -718,6 +818,13 @@ def test_incr_version(self): with self.assertRaises(ValueError): cache.incr_version('does_not_exist') + cache.set('null', None) + if self.supports_get_with_default: + self.assertEqual(cache.incr_version('null'), 2) + else: + with self.assertRaises(self.incr_decr_type_error): + cache.incr_version('null') + def test_decr_version(self): cache.set('answer', 42, version=2) self.assertIsNone(cache.get('answer')) @@ -742,6 +849,13 @@ def test_decr_version(self): with self.assertRaises(ValueError): cache.decr_version('does_not_exist', version=2) + cache.set('null', None, version=2) + if self.supports_get_with_default: + self.assertEqual(cache.decr_version('null', version=2), 1) + else: + with self.assertRaises(self.incr_decr_type_error): + cache.decr_version('null', version=2) + def test_custom_key_func(self): # Two caches with different key functions aren't visible to each other cache.set('answer1', 42) @@ -755,33 +869,34 @@ def test_custom_key_func(self): self.assertEqual(caches['custom_key2'].get('answer2'), 42) def test_cache_write_unpicklable_object(self): - update_middleware = UpdateCacheMiddleware() - update_middleware.cache = cache - - fetch_middleware = FetchFromCacheMiddleware() - fetch_middleware.cache = cache + fetch_middleware = FetchFromCacheMiddleware(empty_response) request = self.factory.get('/cache/test') request._cache_update_cache = True - get_cache_data = FetchFromCacheMiddleware().process_request(request) + get_cache_data = FetchFromCacheMiddleware( + empty_response + ).process_request(request) self.assertIsNone(get_cache_data) - response = HttpResponse() content = 'Testing cookie serialization.' - response.content = content - response.set_cookie('foo', 'bar') - update_middleware.process_response(request, response) + def get_response(req): + response = HttpResponse(content) + response.set_cookie('foo', 'bar') + return response + + update_middleware = UpdateCacheMiddleware(get_response) + response = update_middleware(request) get_cache_data = fetch_middleware.process_request(request) self.assertIsNotNone(get_cache_data) - self.assertEqual(get_cache_data.content, content.encode('utf-8')) + self.assertEqual(get_cache_data.content, content.encode()) self.assertEqual(get_cache_data.cookies, response.cookies) - update_middleware.process_response(request, get_cache_data) + UpdateCacheMiddleware(lambda req: get_cache_data)(request) get_cache_data = fetch_middleware.process_request(request) self.assertIsNotNone(get_cache_data) - self.assertEqual(get_cache_data.content, content.encode('utf-8')) + self.assertEqual(get_cache_data.content, content.encode()) self.assertEqual(get_cache_data.cookies, response.cookies) def test_add_fail_on_pickleerror(self): @@ -797,7 +912,12 @@ def test_get_or_set(self): self.assertIsNone(cache.get('projector')) self.assertEqual(cache.get_or_set('projector', 42), 42) self.assertEqual(cache.get('projector'), 42) - self.assertEqual(cache.get_or_set('null', None), None) + self.assertIsNone(cache.get_or_set('null', None)) + if self.supports_get_with_default: + # Previous get_or_set() stores None in the cache. + self.assertIsNone(cache.get('null', 'default')) + else: + self.assertEqual(cache.get('null', 'default'), 'default') def test_get_or_set_callable(self): def my_callable(): @@ -806,16 +926,19 @@ def my_callable(): self.assertEqual(cache.get_or_set('mykey', my_callable), 'value') self.assertEqual(cache.get_or_set('mykey', my_callable()), 'value') - def test_get_or_set_callable_returning_none(self): - self.assertIsNone(cache.get_or_set('mykey', lambda: None)) - # Previous get_or_set() doesn't store None in the cache. - self.assertEqual(cache.get('mykey', 'default'), 'default') + self.assertIsNone(cache.get_or_set('null', lambda: None)) + if self.supports_get_with_default: + # Previous get_or_set() stores None in the cache. + self.assertIsNone(cache.get('null', 'default')) + else: + self.assertEqual(cache.get('null', 'default'), 'default') def test_get_or_set_version(self): - cache.get_or_set('brian', 1979, version=2) - with self.assertRaises(TypeError): + msg = "get_or_set() missing 1 required positional argument: 'default'" + self.assertEqual(cache.get_or_set('brian', 1979, version=2), 1979) + with self.assertRaisesMessage(TypeError, msg): cache.get_or_set('brian') - with self.assertRaises(TypeError): + with self.assertRaisesMessage(TypeError, msg): cache.get_or_set('brian', version=1) self.assertIsNone(cache.get('brian', version=1)) self.assertEqual(cache.get_or_set('brian', 42, version=1), 42) @@ -823,34 +946,37 @@ def test_get_or_set_version(self): self.assertIsNone(cache.get('brian', version=3)) def test_get_or_set_racing(self): - with mock.patch('%s.%s' % (settings.CACHES['default']['BACKEND'], 'add')) as cache_add: + with mock.patch( + '%s.%s' % (settings.CACHES['default']['BACKEND'], 'add') + ) as cache_add: # Simulate cache.add() failing to add a value. In that case, the # default value should be returned. cache_add.return_value = False self.assertEqual(cache.get_or_set('key', 'default'), 'default') -class PicklingSideEffect(object): - +class PicklingSideEffect: def __init__(self, cache): self.cache = cache self.locked = False def __getstate__(self): - if self.cache._lock.active_writers: - self.locked = True + self.locked = self.cache._lock.locked() return {} -@override_settings(CACHES=caches_setting_for_tests( - BACKEND='diskcache.DjangoCache', -)) +@override_settings( + CACHES=caches_setting_for_tests( + BACKEND='diskcache.DjangoCache', + ) +) class DiskCacheTests(BaseCacheTests, TestCase): - "Specific test cases for diskcache.DjangoCache." + """Specific test cases for diskcache.DjangoCache.""" + def setUp(self): - super(DiskCacheTests, self).setUp() + super().setUp() self.dirname = tempfile.mkdtemp() - # Cache location cannot be modified through override_settings / modify_settings, + # Caches location cannot be modified through override_settings / modify_settings, # hence settings are manipulated directly here and the setting_changed signal # is triggered manually. for cache_params in settings.CACHES.values(): @@ -858,7 +984,7 @@ def setUp(self): setting_changed.send(self.__class__, setting='CACHES', enter=False) def tearDown(self): - super(DiskCacheTests, self).tearDown() + super().tearDown() cache.close() shutil.rmtree(self.dirname, ignore_errors=True) @@ -867,14 +993,23 @@ def test_ignores_non_cache_files(self): with open(fname, 'w'): os.utime(fname, None) cache.clear() - self.assertTrue(os.path.exists(fname), - 'Expected cache.clear to ignore non cache files') + self.assertTrue( + os.path.exists(fname), + 'Expected cache.clear to ignore non cache files', + ) os.remove(fname) + def test_creates_cache_dir_if_nonexistent(self): + os.rmdir(self.dirname) + cache.set('foo', 'bar') + self.assertTrue(os.path.exists(self.dirname)) + def test_clear_does_not_remove_cache_dir(self): cache.clear() - self.assertTrue(os.path.exists(self.dirname), - 'Expected cache.clear to keep the cache dir') + self.assertTrue( + os.path.exists(self.dirname), + 'Expected cache.clear to keep the cache dir', + ) def test_cache_write_unpicklable_type(self): # This fails if not using the highest pickling protocol on Python 2. @@ -896,7 +1031,7 @@ def test_directory(self): self.assertTrue('tmp' in cache.directory) def test_read(self): - value = b'abcd' * 2 ** 20 + value = b'abcd' * 2**20 result = cache.set(b'test-key', value) self.assertTrue(result) @@ -941,21 +1076,15 @@ def test_pop(self): self.assertEqual(cache.pop(0, default=1), 1) self.assertEqual(cache.pop(1, expire_time=True), (1, None)) self.assertEqual(cache.pop(2, tag=True), (2, None)) - self.assertEqual(cache.pop(3, expire_time=True, tag=True), (3, None, None)) + self.assertEqual( + cache.pop(3, expire_time=True, tag=True), (3, None, None) + ) self.assertEqual(cache.pop(4, retry=False), 4) - def test_pickle(self): - letters = 'abcde' - cache.clear() - - for num, val in enumerate(letters): - cache.set(val, num) - - data = pickle.dumps(cache) - other = pickle.loads(data) - - for key in letters: - self.assertEqual(other.get(key), cache.get(key)) + def test_cache(self): + subcache = cache.cache('test') + directory = os.path.join(cache.directory, 'cache', 'test') + self.assertEqual(subcache.directory, directory) def test_deque(self): deque = cache.deque('test') @@ -968,6 +1097,12 @@ def test_index(self): self.assertEqual(index.directory, directory) def test_memoize(self): + with self.assertRaises(TypeError): + + @cache.memoize # <-- Missing parens! + def test(): + pass + count = 1000 def fibiter(num): diff --git a/tests/test_doctest.py b/tests/test_doctest.py index ba8eb6b..822d8db 100644 --- a/tests/test_doctest.py +++ b/tests/test_doctest.py @@ -1,45 +1,37 @@ import doctest -import shutil import diskcache.core import diskcache.djangocache import diskcache.fanout -import diskcache.memo import diskcache.persistent - - -def rmdir(directory): - try: - shutil.rmtree(directory) - except OSError: - pass +import diskcache.recipes def test_core(): - rmdir('/tmp/diskcache') failures, _ = doctest.testmod(diskcache.core) assert failures == 0 def test_djangocache(): - rmdir('/tmp/diskcache') failures, _ = doctest.testmod(diskcache.djangocache) assert failures == 0 def test_fanout(): - rmdir('/tmp/diskcache') failures, _ = doctest.testmod(diskcache.fanout) assert failures == 0 -def test_memo(): - rmdir('/tmp/diskcache') - failures, _ = doctest.testmod(diskcache.memo) +def test_persistent(): + failures, _ = doctest.testmod(diskcache.persistent) assert failures == 0 -def test_persistent(): - rmdir('/tmp/diskcache') - failures, _ = doctest.testmod(diskcache.persistent) +def test_recipes(): + failures, _ = doctest.testmod(diskcache.recipes) + assert failures == 0 + + +def test_tutorial(): + failures, _ = doctest.testfile('../docs/tutorial.rst') assert failures == 0 diff --git a/tests/test_fanout.py b/tests/test_fanout.py index 6d43f95..af221b6 100644 --- a/tests/test_fanout.py +++ b/tests/test_fanout.py @@ -1,56 +1,41 @@ -"Test diskcache.fanout.FanoutCache." +"""Test diskcache.fanout.FanoutCache.""" -from __future__ import print_function - -import errno -import functools as ft +import collections as co import hashlib import io -import mock -import nose.tools as nt import os import os.path as op -import random +import pathlib +import pickle import shutil -import sqlite3 import subprocess as sp -import sys +import tempfile import threading import time import warnings +from unittest import mock -try: - import cPickle as pickle -except: - import pickle +import pytest import diskcache as dc warnings.simplefilter('error') warnings.simplefilter('ignore', category=dc.EmptyDirWarning) -if sys.hexversion < 0x03000000: - range = xrange -def setup_cache(func): - @ft.wraps(func) - def wrapper(): - shutil.rmtree('tmp', ignore_errors=True) - with dc.FanoutCache('tmp') as cache: - func(cache) - shutil.rmtree('tmp', ignore_errors=True) - return wrapper +@pytest.fixture +def cache(): + with dc.FanoutCache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) -@setup_cache def test_init(cache): - assert cache.directory == 'tmp' - default_settings = dc.DEFAULT_SETTINGS.copy() del default_settings['size_limit'] for key, value in default_settings.items(): assert getattr(cache, key) == value - assert cache.size_limit == 2 ** 27 + assert cache.size_limit == 2**27 cache.check() @@ -60,7 +45,13 @@ def test_init(cache): cache.check() -@setup_cache +def test_init_path(cache): + path = pathlib.Path(cache.directory) + other = dc.FanoutCache(path) + other.close() + assert cache.directory == other.directory + + def test_set_get_delete(cache): for value in range(100): cache.set(value, value) @@ -79,7 +70,7 @@ def test_set_get_delete(cache): for value in range(100): assert cache.delete(value) - assert cache.delete(100) == False + assert cache.delete(100) is False cache.check() @@ -99,7 +90,6 @@ def test_set_get_delete(cache): cache.check() -@setup_cache def test_set_timeout(cache): shards = mock.Mock() shard = mock.Mock() @@ -113,29 +103,32 @@ def test_set_timeout(cache): assert not cache.set(0, 0) -@setup_cache -def test_set_timeout_retry(cache): +def test_touch(cache): + assert cache.set(0, None, expire=60) + assert cache.touch(0, expire=None) + assert cache.touch(0, expire=0) + assert not cache.touch(0) + + +def test_touch_timeout(cache): shards = mock.Mock() shard = mock.Mock() - set_func = mock.Mock() + touch_func = mock.Mock() shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.set = set_func - set_func.side_effect = [dc.Timeout, True, dc.Timeout, True] + shard.touch = touch_func + touch_func.side_effect = dc.Timeout with mock.patch.object(cache, '_shards', shards): - assert cache.set(0, 0, retry=True) - cache[1] = 1 + assert not cache.touch(0) -@setup_cache def test_add(cache): assert cache.add(0, 0) assert not cache.add(0, 1) assert cache.get(0) == 0 -@setup_cache def test_add_timeout(cache): shards = mock.Mock() shard = mock.Mock() @@ -149,26 +142,41 @@ def test_add_timeout(cache): assert not cache.add(0, 0) -@setup_cache -def test_add_timeout_retry(cache): - shards = mock.Mock() - shard = mock.Mock() - add_func = mock.Mock() +def stress_add(cache, limit, results): + total = 0 + for num in range(limit): + if cache.add(num, num, retry=True): + total += 1 + # Stop one thread from running ahead of others. + time.sleep(0.001) + results.append(total) - shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.add = add_func - add_func.side_effect = [dc.Timeout, True] - with mock.patch.object(cache, '_shards', shards): - assert cache.add(0, 0, retry=True) +def test_add_concurrent(): + with dc.FanoutCache(shards=1) as cache: + results = co.deque() + limit = 1000 + + threads = [ + threading.Thread(target=stress_add, args=(cache, limit, results)) + for _ in range(16) + ] + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + assert sum(results) == limit + cache.check() + shutil.rmtree(cache.directory, ignore_errors=True) -@setup_cache def test_incr(cache): cache.incr('key', delta=3) == 3 -@setup_cache def test_incr_timeout(cache): shards = mock.Mock() shard = mock.Mock() @@ -182,23 +190,21 @@ def test_incr_timeout(cache): assert cache.incr('key', 1) is None -@setup_cache -def test_incr_timeout_retry(cache): +def test_decr(cache): + cache.decr('key', delta=2) == -2 + + +def test_decr_timeout(cache): shards = mock.Mock() shard = mock.Mock() - incr_func = mock.Mock() + decr_func = mock.Mock() shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.incr = incr_func - incr_func.side_effect = [dc.Timeout, 1] + shard.decr = decr_func + decr_func.side_effect = dc.Timeout with mock.patch.object(cache, '_shards', shards): - assert cache.incr('key', retry=True) == 1 - - -@setup_cache -def test_decr(cache): - cache.decr('key', delta=2) == -2 + assert cache.decr('key', 1) is None def stress_incr(cache, limit): @@ -208,10 +214,10 @@ def stress_incr(cache, limit): def test_incr_concurrent(): - count = 16 - limit = 500 + with dc.FanoutCache(shards=1, timeout=0.001) as cache: + count = 16 + limit = 50 - with dc.FanoutCache('tmp', timeout=0.001) as cache: threads = [ threading.Thread(target=stress_incr, args=(cache, limit)) for _ in range(count) @@ -223,44 +229,72 @@ def test_incr_concurrent(): for thread in threads: thread.join() - with dc.FanoutCache('tmp') as cache: assert cache.get(b'key') == count * limit cache.check() + shutil.rmtree(cache.directory, ignore_errors=True) + + +def test_getsetdel(cache): + values = [ + (None, False), + ((None,) * 2**10, False), + (1234, False), + (2**512, False), + (56.78, False), + ('hello', False), + ('hello' * 2**10, False), + (b'world', False), + (b'world' * 2**10, False), + (io.BytesIO(b'world' * 2**10), True), + ] + + for key, (value, file_like) in enumerate(values): + assert cache.set(key, value, read=file_like) + + assert len(cache) == len(values) + + for key, (value, file_like) in enumerate(values): + if file_like: + assert cache[key] == value.getvalue() + else: + assert cache[key] == value - shutil.rmtree('tmp', ignore_errors=True) + for key, _ in enumerate(values): + del cache[key] + assert len(cache) == 0 -@setup_cache -def test_get_timeout(cache): - cache.set(0, 0) + for value, (key, _) in enumerate(values): + cache[key] = value - shards = mock.Mock() - shard = mock.Mock() - get_func = mock.Mock() + assert len(cache) == len(values) - shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.get = get_func - get_func.side_effect = dc.Timeout + for value, (key, _) in enumerate(values): + assert cache[key] == value - with mock.patch.object(cache, '_shards', shards): - assert cache.get(0) is None + for _, (key, _) in enumerate(values): + del cache[key] + assert len(cache) == 0 + + cache.check() + + +def test_get_timeout(cache): + cache.set(0, 0) -@setup_cache -def test_get_timeout_retry(cache): shards = mock.Mock() shard = mock.Mock() get_func = mock.Mock() shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) shard.get = get_func - get_func.side_effect = [dc.Timeout, 0] + get_func.side_effect = dc.Timeout with mock.patch.object(cache, '_shards', shards): - assert cache.get(0, retry=True) == 0 + assert cache.get(0) is None -@setup_cache def test_pop(cache): for num in range(100): cache[num] = num @@ -269,7 +303,6 @@ def test_pop(cache): assert cache.pop(num) == num -@setup_cache def test_pop_timeout(cache): shards = mock.Mock() shard = mock.Mock() @@ -283,76 +316,30 @@ def test_pop_timeout(cache): assert cache.pop(0) is None -@setup_cache -def test_pop_timeout_retry(cache): - shards = mock.Mock() - shard = mock.Mock() - pop_func = mock.Mock() - - shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.pop = pop_func - pop_func.side_effect = [dc.Timeout, 0] - - with mock.patch.object(cache, '_shards', shards): - assert cache.pop(0, retry=True) == 0 - - -@setup_cache def test_delete_timeout(cache): shards = mock.Mock() shard = mock.Mock() delete_func = mock.Mock() shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.__delitem__ = delete_func + shard.delete = delete_func delete_func.side_effect = dc.Timeout with mock.patch.object(cache, '_shards', shards): assert not cache.delete(0) -@setup_cache -def test_delete_timeout_retry(cache): - shards = mock.Mock() - shard = mock.Mock() - delete_func = mock.Mock() - - shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.__delitem__ = delete_func - delete_func.side_effect = [dc.Timeout, True] - - with mock.patch.object(cache, '_shards', shards): - assert cache.delete(0, retry=True) - - -@setup_cache def test_delitem(cache): cache[0] = 0 assert cache[0] == 0 del cache[0] -@setup_cache -@nt.raises(KeyError) def test_delitem_keyerror(cache): - del cache[0] - - -@setup_cache -def test_delitem_timeout(cache): - shards = mock.Mock() - shard = mock.Mock() - delete_func = mock.Mock() - - shards.__getitem__ = mock.Mock(side_effect=lambda key: shard) - shard.__delitem__ = delete_func - delete_func.side_effect = [dc.Timeout, True] - - with mock.patch.object(cache, '_shards', shards): + with pytest.raises(KeyError): del cache[0] -@setup_cache def test_tag_index(cache): assert cache.tag_index == 0 cache.create_tag_index() @@ -361,32 +348,28 @@ def test_tag_index(cache): assert cache.tag_index == 0 -@setup_cache def test_read(cache): - cache.set(0, b'abcd' * 2 ** 20) + cache.set(0, b'abcd' * 2**20) with cache.read(0) as reader: assert reader is not None -@nt.raises(KeyError) -@setup_cache def test_read_keyerror(cache): - with cache.read(0) as reader: - pass + with pytest.raises(KeyError): + with cache.read(0): + pass -@nt.raises(KeyError) -@setup_cache def test_getitem_keyerror(cache): - cache[0] + with pytest.raises(KeyError): + cache[0] -@setup_cache def test_expire(cache): cache.reset('cull_limit', 0) for value in range(100): - cache.set(value, value, expire=0) + cache.set(value, value, expire=1e-9) assert len(cache) == 100 @@ -396,7 +379,6 @@ def test_expire(cache): assert cache.expire() == 100 -@setup_cache def test_evict(cache): colors = ('red', 'blue', 'yellow') @@ -409,7 +391,6 @@ def test_evict(cache): assert len(cache.check()) == 0 -@setup_cache def test_size_limit_with_files(cache): shards = 8 cache.reset('cull_limit', 0) @@ -425,7 +406,6 @@ def test_size_limit_with_files(cache): assert (cache.volume() // shards) <= size_limit -@setup_cache def test_size_limit_with_database(cache): shards = 8 cache.reset('cull_limit', 0) @@ -442,7 +422,6 @@ def test_size_limit_with_database(cache): assert (cache.volume() // shards) <= size_limit -@setup_cache def test_clear(cache): for value in range(100): cache[value] = value @@ -452,7 +431,6 @@ def test_clear(cache): assert len(cache.check()) == 0 -@setup_cache def test_remove_timeout(cache): shard = mock.Mock() clear = mock.Mock() @@ -464,7 +442,6 @@ def test_remove_timeout(cache): assert cache.clear() == 5 -@setup_cache def test_reset_timeout(cache): shard = mock.Mock() reset = mock.Mock() @@ -476,7 +453,6 @@ def test_reset_timeout(cache): assert cache.reset('blah', 1) == 0 -@setup_cache def test_stats(cache): for value in range(100): cache[value] = value @@ -502,20 +478,17 @@ def test_stats(cache): assert len(cache.check()) == 0 -@setup_cache def test_volume(cache): volume = sum(shard.volume() for shard in cache._shards) assert volume == cache.volume() -@setup_cache def test_iter(cache): for num in range(100): cache[num] = num assert set(cache) == set(range(100)) -@setup_cache def test_iter_expire(cache): """Test iteration with expiration. @@ -524,14 +497,13 @@ def test_iter_expire(cache): """ cache.reset('cull_limit', 0) for num in range(100): - cache.set(num, num, expire=0) + cache.set(num, num, expire=1e-9) time.sleep(0.1) assert set(cache) == set(range(100)) cache.expire() assert set(cache) == set() -@setup_cache def test_reversed(cache): for num in range(100): cache[num] = num @@ -539,7 +511,6 @@ def test_reversed(cache): assert list(cache) == list(reversed(reverse)) -@setup_cache def test_pickle(cache): for num, val in enumerate('abcde'): cache[val] = num @@ -551,7 +522,6 @@ def test_pickle(cache): assert other[key] == cache[key] -@setup_cache def test_memoize(cache): count = 1000 @@ -589,7 +559,7 @@ def fibrec(num): def test_copy(): - cache_dir1 = op.join('tmp', 'foo') + cache_dir1 = tempfile.mkdtemp() with dc.FanoutCache(cache_dir1) as cache1: for count in range(10): @@ -598,7 +568,8 @@ def test_copy(): for count in range(10, 20): cache1[count] = str(count) * int(1e5) - cache_dir2 = op.join('tmp', 'bar') + cache_dir2 = tempfile.mkdtemp() + shutil.rmtree(cache_dir2) shutil.copytree(cache_dir1, cache_dir2) with dc.FanoutCache(cache_dir2) as cache2: @@ -608,7 +579,8 @@ def test_copy(): for count in range(10, 20): assert cache2[count] == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache_dir1, ignore_errors=True) + shutil.rmtree(cache_dir2, ignore_errors=True) def run(command): @@ -628,8 +600,8 @@ def test_rsync(): return # No rsync installed. Skip test. rsync_args = ['rsync', '-a', '--checksum', '--delete', '--stats'] - cache_dir1 = op.join('tmp', 'foo') + os.sep - cache_dir2 = op.join('tmp', 'bar') + os.sep + cache_dir1 = tempfile.mkdtemp() + os.sep + cache_dir2 = tempfile.mkdtemp() + os.sep # Store some items in cache_dir1. @@ -681,7 +653,8 @@ def test_rsync(): for count in range(300, 400): assert cache1[count] == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) + shutil.rmtree(cache_dir1, ignore_errors=True) + shutil.rmtree(cache_dir2, ignore_errors=True) class SHA256FilenameDisk(dc.Disk): @@ -692,26 +665,21 @@ def filename(self, key=dc.UNKNOWN, value=dc.UNKNOWN): def test_custom_filename_disk(): - with dc.FanoutCache('tmp', disk=SHA256FilenameDisk) as cache: + with dc.FanoutCache(disk=SHA256FilenameDisk) as cache: for count in range(100, 200): key = str(count).encode('ascii') cache[key] = str(count) * int(1e5) - disk = SHA256FilenameDisk('tmp') + disk = SHA256FilenameDisk(cache.directory) for count in range(100, 200): key = str(count).encode('ascii') subdir = '%03d' % (disk.hash(key) % 8) filename = hashlib.sha256(key).hexdigest()[:32] - full_path = op.join('tmp', subdir, filename) + full_path = op.join(cache.directory, subdir, filename) with open(full_path) as reader: content = reader.read() assert content == str(count) * int(1e5) - shutil.rmtree('tmp', ignore_errors=True) - - -if __name__ == '__main__': - import nose - nose.runmodule() + shutil.rmtree(cache.directory, ignore_errors=True) diff --git a/tests/test_index.py b/tests/test_index.py index 423c3c9..742daf3 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1,11 +1,10 @@ -"Test diskcache.persistent.Index." +"""Test diskcache.persistent.Index.""" -import functools as ft -import mock -import nose.tools as nt import pickle import shutil -import sys +import tempfile + +import pytest import diskcache as dc @@ -17,21 +16,15 @@ def rmdir(directory): pass -def setup_index(func): - @ft.wraps(func) - def wrapper(): - index = dc.Index() - try: - func(index) - except Exception: - rmdir(index.directory) - raise - - return wrapper +@pytest.fixture +def index(): + index = dc.Index() + yield index + rmdir(index.directory) def test_init(): - directory = '/tmp/diskcache/index' + directory = tempfile.mkdtemp() mapping = {'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1} index = dc.Index(None, mapping) @@ -64,7 +57,6 @@ def test_init(): assert index == mapping -@setup_index def test_getsetdel(index): letters = 'abcde' assert len(index) == 0 @@ -81,34 +73,6 @@ def test_getsetdel(index): assert len(index) == 0 -@setup_index -def test_get_timeout(index): - cache = mock.MagicMock() - cache.__getitem__.side_effect = [dc.Timeout, 0] - - with mock.patch.object(index, '_cache', cache): - assert index[0] == 0 - - -@setup_index -def test_set_timeout(index): - cache = mock.MagicMock() - cache.__setitem__.side_effect = [dc.Timeout, None] - - with mock.patch.object(index, '_cache', cache): - index[0] = 0 - - -@setup_index -def test_del_timeout(index): - cache = mock.MagicMock() - cache.__delitem__.side_effect = [dc.Timeout, None] - - with mock.patch.object(index, '_cache', cache): - del index[0] - - -@setup_index def test_pop(index): letters = 'abcde' assert len(index) == 0 @@ -124,22 +88,11 @@ def test_pop(index): assert len(index) == 0 -@nt.raises(KeyError) -@setup_index def test_pop_keyerror(index): - index.pop('a') - - -@setup_index -def test_pop_timeout(index): - cache = mock.MagicMock() - cache.pop.side_effect = [dc.Timeout, 1] + with pytest.raises(KeyError): + index.pop('a') - with mock.patch.object(index, '_cache', cache): - assert index.pop(0) == 1 - -@setup_index def test_popitem(index): letters = 'abcde' @@ -152,44 +105,16 @@ def test_popitem(index): assert len(index) == 2 -@nt.raises(KeyError) -@setup_index def test_popitem_keyerror(index): - index.popitem() - - -@setup_index -def test_popitem_timeout(index): - cache = mock.MagicMock() - cache.__reversed__ = mock.Mock() - cache.__reversed__.side_effect = [iter([0]), iter([0])] - cache.pop.side_effect = [dc.Timeout, 1] + with pytest.raises(KeyError): + index.popitem() - with mock.patch.object(index, '_cache', cache): - value = index.popitem() - assert value == (0, 1) - -@setup_index def test_setdefault(index): assert index.setdefault('a', 0) == 0 assert index.setdefault('a', 1) == 0 -@setup_index -def test_setdefault_timeout(index): - cache = mock.MagicMock() - cache.__getitem__ = mock.Mock() - cache.__getitem__.side_effect = [KeyError, 0] - cache.add = mock.Mock() - cache.add.side_effect = [dc.Timeout, 0] - - with mock.patch.object(index, '_cache', cache): - value = index.setdefault('a', 0) - assert value == 0 - - -@setup_index def test_iter(index): letters = 'abcde' @@ -200,7 +125,6 @@ def test_iter(index): assert index[key] == num -@setup_index def test_reversed(index): letters = 'abcde' @@ -211,7 +135,6 @@ def test_reversed(index): assert index[key] == (len(letters) - num - 1) -@setup_index def test_state(index): mapping = {'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1} index.update(mapping) @@ -221,49 +144,41 @@ def test_state(index): assert values == mapping -@setup_index -def test_push_timeout(index): - cache = mock.MagicMock() - cache.push.side_effect = [dc.Timeout, None] - - with mock.patch.object(index, '_cache', cache): - index.push(0) +def test_memoize(index): + count = 1000 + def fibiter(num): + alpha, beta = 0, 1 -@setup_index -def test_pull_timeout(index): - cache = mock.MagicMock() - cache.pull.side_effect = [dc.Timeout, None] + for _ in range(num): + alpha, beta = beta, alpha + beta - with mock.patch.object(index, '_cache', cache): - index.pull(0) + return alpha + @index.memoize() + def fibrec(num): + if num == 0: + return 0 + elif num == 1: + return 1 + else: + return fibrec(num - 1) + fibrec(num - 2) -@setup_index -def test_clear_timeout(index): - cache = mock.MagicMock() - cache.clear.side_effect = [dc.Timeout, None] + index._cache.stats(enable=True) - with mock.patch.object(index, '_cache', cache): - index.clear() + for value in range(count): + assert fibrec(value) == fibiter(value) + hits1, misses1 = index._cache.stats() -if sys.hexversion < 0x03000000: - @setup_index - def test_itervalues_timeout(index): - cache = mock.MagicMock() - cache.__iter__.side_effect = [iter([0, 1, 2])] - cache.__getitem__.side_effect = [dc.Timeout, KeyError, 1, 2] + for value in range(count): + assert fibrec(value) == fibiter(value) - with mock.patch.object(index, '_cache', cache): - assert list(index.itervalues()) == [1, 2] + hits2, misses2 = index._cache.stats() + assert hits2 == (hits1 + count) + assert misses2 == misses1 - @setup_index - def test_iteritems_timeout(index): - cache = mock.MagicMock() - cache.__iter__.side_effect = [iter([0, 1, 2])] - cache.__getitem__.side_effect = [dc.Timeout, KeyError, 1, 2] - with mock.patch.object(index, '_cache', cache): - assert list(index.iteritems()) == [(1, 1), (2, 2)] +def test_repr(index): + assert repr(index).startswith('Index(') diff --git a/tests/test_recipes.py b/tests/test_recipes.py new file mode 100644 index 0000000..ae74459 --- /dev/null +++ b/tests/test_recipes.py @@ -0,0 +1,106 @@ +"""Test diskcache.recipes.""" + +import shutil +import threading +import time + +import pytest + +import diskcache as dc + + +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) + + +def test_averager(cache): + nums = dc.Averager(cache, 'nums') + for i in range(10): + nums.add(i) + assert nums.get() == 4.5 + assert nums.pop() == 4.5 + for i in range(20): + nums.add(i) + assert nums.get() == 9.5 + assert nums.pop() == 9.5 + + +def test_lock(cache): + state = {'num': 0} + lock = dc.Lock(cache, 'demo') + + def worker(): + state['num'] += 1 + with lock: + assert lock.locked() + state['num'] += 1 + time.sleep(0.1) + + with lock: + thread = threading.Thread(target=worker) + thread.start() + time.sleep(0.1) + assert state['num'] == 1 + thread.join() + assert state['num'] == 2 + + +def test_rlock(cache): + state = {'num': 0} + rlock = dc.RLock(cache, 'demo') + + def worker(): + state['num'] += 1 + with rlock: + with rlock: + state['num'] += 1 + time.sleep(0.1) + + with rlock: + thread = threading.Thread(target=worker) + thread.start() + time.sleep(0.1) + assert state['num'] == 1 + thread.join() + assert state['num'] == 2 + + +def test_semaphore(cache): + state = {'num': 0} + semaphore = dc.BoundedSemaphore(cache, 'demo', value=3) + + def worker(): + state['num'] += 1 + with semaphore: + state['num'] += 1 + time.sleep(0.1) + + semaphore.acquire() + semaphore.acquire() + with semaphore: + thread = threading.Thread(target=worker) + thread.start() + time.sleep(0.1) + assert state['num'] == 1 + thread.join() + assert state['num'] == 2 + semaphore.release() + semaphore.release() + + +def test_memoize_stampede(cache): + state = {'num': 0} + + @dc.memoize_stampede(cache, 0.1) + def worker(num): + time.sleep(0.01) + state['num'] += 1 + return num + + start = time.time() + while (time.time() - start) < 1: + worker(100) + assert state['num'] > 0 diff --git a/tests/timings_core_p1.txt b/tests/timings_core_p1.txt index 7d6394d..f2b6951 100644 --- a/tests/timings_core_p1.txt +++ b/tests/timings_core_p1.txt @@ -4,10 +4,10 @@ Timings for diskcache.Cache ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 88966 9705 12.159us 17.166us 28.849us 174.999us 1.206s - set 9021 0 68.903us 93.937us 188.112us 10.297ms 875.907ms - delete 1012 104 47.207us 66.042us 128.031us 7.160ms 89.599ms - Total 98999 2.171s + get 89115 8714 19.073us 25.749us 32.902us 115.395us 1.800s + set 8941 0 114.918us 137.091us 241.041us 4.946ms 1.242s + delete 943 111 87.976us 149.202us 219.824us 4.795ms 120.738ms + Total 98999 3.163s ========= ========= ========= ========= ========= ========= ========= ========= @@ -16,10 +16,10 @@ Timings for diskcache.FanoutCache(shards=4, timeout=1.0) ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 88966 9705 15.020us 20.027us 33.855us 437.021us 1.425s - set 9021 0 71.049us 100.136us 203.133us 9.186ms 892.262ms - delete 1012 104 48.161us 69.141us 129.952us 5.216ms 87.294ms - Total 98999 2.405s + get 89115 8714 21.935us 27.180us 36.001us 129.938us 2.028s + set 8941 0 118.017us 170.946us 270.844us 5.129ms 1.307s + delete 943 111 91.791us 153.780us 231.981us 4.883ms 119.732ms + Total 98999 3.455s ========= ========= ========= ========= ========= ========= ========= ========= @@ -28,10 +28,10 @@ Timings for diskcache.FanoutCache(shards=8, timeout=0.010) ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 88966 9705 15.020us 20.027us 34.094us 627.995us 1.420s - set 9021 0 72.956us 100.851us 203.133us 9.623ms 927.824ms - delete 1012 104 50.783us 72.002us 132.084us 8.396ms 78.898ms - Total 98999 2.426s + get 89115 8714 20.981us 27.180us 35.286us 128.031us 2.023s + set 8941 0 116.825us 175.953us 269.175us 5.248ms 1.367s + delete 943 111 91.791us 158.787us 235.345us 4.634ms 106.991ms + Total 98999 3.496s ========= ========= ========= ========= ========= ========= ========= ========= @@ -40,10 +40,10 @@ Timings for pylibmc.Client ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 88966 9705 25.988us 29.802us 41.008us 139.952us 2.388s - set 9021 0 27.895us 30.994us 40.054us 97.990us 254.248ms - delete 1012 104 25.988us 29.087us 38.147us 89.169us 27.159ms - Total 98999 2.669s + get 89115 8714 42.915us 62.227us 79.155us 166.178us 3.826s + set 8941 0 44.107us 63.896us 82.254us 121.832us 396.247ms + delete 943 111 41.962us 60.797us 75.817us 92.983us 39.570ms + Total 98999 4.262s ========= ========= ========= ========= ========= ========= ========= ========= @@ -52,8 +52,8 @@ Timings for redis.StrictRedis ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 88966 9705 44.107us 54.121us 73.910us 204.086us 4.125s - set 9021 0 45.061us 56.028us 75.102us 237.942us 427.197ms - delete 1012 104 44.107us 54.836us 72.002us 126.839us 46.771ms - Total 98999 4.599s + get 89115 8714 86.069us 101.089us 144.005us 805.140us 7.722s + set 8941 0 89.169us 104.189us 146.866us 408.173us 800.963ms + delete 943 111 86.069us 99.182us 149.012us 327.826us 80.976ms + Total 98999 8.604s ========= ========= ========= ========= ========= ========= ========= ========= diff --git a/tests/timings_core_p8.txt b/tests/timings_core_p8.txt index c7c5713..bb4ed47 100644 --- a/tests/timings_core_p8.txt +++ b/tests/timings_core_p8.txt @@ -4,10 +4,10 @@ Timings for diskcache.Cache ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 71214 15.974us 23.127us 40.054us 4.953ms 12.349s - set 71530 0 94.891us 1.328ms 21.307ms 1.846s 131.728s - delete 7916 807 65.088us 1.278ms 19.610ms 1.244s 13.811s - Total 791992 157.888s + get 712612 69147 20.027us 28.133us 45.061us 2.792ms 15.838s + set 71464 0 129.700us 1.388ms 35.831ms 1.342s 160.708s + delete 7916 769 97.036us 1.340ms 21.605ms 837.003ms 13.551s + Total 791992 194.943s ========= ========= ========= ========= ========= ========= ========= ========= @@ -16,10 +16,10 @@ Timings for diskcache.FanoutCache(shards=4, timeout=1.0) ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 71623 19.073us 35.048us 59.843us 12.980ms 16.849s - set 71530 0 108.004us 1.313ms 9.176ms 333.361ms 50.821s - delete 7916 767 73.195us 1.264ms 9.033ms 108.232ms 4.964s - Total 791992 72.634s + get 712612 70432 27.895us 48.876us 77.963us 12.945ms 25.443s + set 71464 0 176.907us 1.416ms 9.385ms 183.997ms 65.606s + delete 7916 747 132.084us 1.354ms 9.272ms 86.189ms 6.576s + Total 791992 98.248s ========= ========= ========= ========= ========= ========= ========= ========= @@ -28,10 +28,10 @@ Timings for diskcache.FanoutCache(shards=8, timeout=0.010) ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 71106 25.034us 47.922us 101.089us 9.015ms 22.336s - set 71530 39 134.945us 1.324ms 5.763ms 16.027ms 33.347s - delete 7916 775 88.930us 1.267ms 5.017ms 13.732ms 3.308s - Total 791992 58.991s + get 712612 69622 41.962us 71.049us 96.083us 16.896ms 36.145s + set 71464 39 257.969us 1.456ms 7.132ms 19.774ms 46.160s + delete 7916 773 190.020us 1.377ms 5.927ms 12.939ms 4.442s + Total 791992 86.799s ========= ========= ========= ========= ========= ========= ========= ========= @@ -40,10 +40,10 @@ Timings for pylibmc.Client ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 72043 83.923us 107.050us 123.978us 617.027us 61.824s - set 71530 0 84.877us 108.004us 124.931us 312.090us 6.283s - delete 7916 796 82.970us 105.858us 123.024us 288.963us 680.970ms - Total 791992 68.788s + get 712612 70517 95.844us 113.010us 131.130us 604.153us 69.024s + set 71464 0 97.036us 114.918us 136.137us 608.921us 7.024s + delete 7916 817 94.891us 112.057us 132.084us 604.153us 760.844ms + Total 791992 76.809s ========= ========= ========= ========= ========= ========= ========= ========= @@ -52,8 +52,8 @@ Timings for redis.StrictRedis ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 72093 138.044us 169.039us 212.908us 151.121ms 101.197s - set 71530 0 138.998us 169.992us 216.007us 1.200ms 10.173s - delete 7916 752 136.137us 167.847us 211.954us 1.059ms 1.106s - Total 791992 112.476s + get 712612 70540 187.874us 244.141us 305.891us 1.416ms 138.516s + set 71464 0 192.881us 249.147us 311.136us 1.363ms 14.246s + delete 7916 825 185.966us 242.949us 305.176us 519.276us 1.525s + Total 791992 154.287s ========= ========= ========= ========= ========= ========= ========= ========= diff --git a/tests/timings_djangocache.txt b/tests/timings_djangocache.txt index c80e180..1a24c07 100644 --- a/tests/timings_djangocache.txt +++ b/tests/timings_djangocache.txt @@ -4,10 +4,10 @@ Timings for locmem ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 140750 36.001us 57.936us 60.081us 10.202ms 28.962s - set 71530 0 36.955us 39.101us 45.061us 2.784ms 2.709s - delete 7916 0 32.902us 35.048us 37.193us 1.524ms 265.399ms - Total 791992 31.936s + get 712770 141094 34.809us 47.922us 55.075us 15.140ms 26.159s + set 71249 0 38.862us 41.008us 59.843us 8.094ms 2.725s + delete 7973 0 32.902us 35.048us 51.260us 2.963ms 257.951ms + Total 791992 29.142s ========= ========= ========= ========= ========= ========= ========= ========= @@ -16,10 +16,10 @@ Timings for memcached ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 69185 87.023us 99.182us 110.865us 576.973us 61.758s - set 71530 0 89.169us 102.043us 114.202us 259.876us 6.395s - delete 7916 0 85.115us 97.990us 108.957us 201.941us 672.212ms - Total 791992 68.825s + get 712770 71873 102.043us 118.017us 182.867us 2.054ms 73.453s + set 71249 0 104.904us 123.978us 182.152us 836.849us 7.592s + delete 7973 0 98.944us 114.918us 176.191us 473.261us 795.398ms + Total 791992 81.841s ========= ========= ========= ========= ========= ========= ========= ========= @@ -28,10 +28,10 @@ Timings for redis ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 69526 160.933us 195.980us 239.134us 1.365ms 116.816s - set 71530 0 166.178us 200.987us 242.949us 587.940us 12.143s - delete 7916 791 143.051us 177.860us 217.915us 330.925us 1.165s - Total 791992 130.124s + get 712770 71694 214.100us 267.982us 358.820us 1.556ms 155.709s + set 71249 0 230.789us 284.195us 377.178us 1.462ms 16.764s + delete 7973 790 195.742us 251.770us 345.945us 1.105ms 1.596s + Total 791992 174.069s ========= ========= ========= ========= ========= ========= ========= ========= @@ -40,10 +40,10 @@ Timings for diskcache ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712546 69509 33.855us 56.982us 79.155us 11.908ms 30.078s - set 71530 0 178.814us 1.355ms 5.032ms 26.620ms 34.461s - delete 7916 0 107.050us 1.280ms 4.738ms 17.217ms 3.303s - Total 791992 67.842s + get 712770 70909 55.075us 82.016us 106.096us 36.816ms 44.088s + set 71249 0 303.984us 1.489ms 6.499ms 39.687ms 49.088s + delete 7973 0 228.882us 1.409ms 5.769ms 24.750ms 4.755s + Total 791992 98.465s ========= ========= ========= ========= ========= ========= ========= ========= @@ -52,8 +52,8 @@ Timings for filebased ------------------------------------------------------------------------------- Action Count Miss Median P90 P99 Max Total ========= ========= ========= ========= ========= ========= ========= ========= - get 712749 103843 112.772us 193.119us 423.908us 18.428ms 92.428s - set 71431 0 8.893ms 11.742ms 14.790ms 44.201ms 646.879s - delete 7812 0 223.875us 389.099us 679.016us 15.058ms 1.940s - Total 791992 741.247s + get 712792 112290 114.918us 161.171us 444.889us 61.068ms 94.438s + set 71268 0 11.289ms 13.278ms 16.653ms 108.282ms 809.448s + delete 7977 0 432.014us 675.917us 5.785ms 55.249ms 3.652s + Total 791992 907.537s ========= ========= ========= ========= ========= ========= ========= ========= diff --git a/tests/utils.py b/tests/utils.py index f2370da..38e5d33 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import os import subprocess as sp @@ -20,7 +18,6 @@ def percentile(sequence, percent): def secs(value): units = ['s ', 'ms', 'us', 'ns'] - pos = 0 if value is None: return ' 0.000ns' @@ -35,7 +32,7 @@ def secs(value): def run(*args): - "Run command, print output, and return output." + """Run command, print output, and return output.""" print('utils$', *args) result = sp.check_output(args) print(result) @@ -43,7 +40,7 @@ def run(*args): def mount_ramdisk(size, path): - "Mount RAM disk at `path` with `size` in bytes." + """Mount RAM disk at `path` with `size` in bytes.""" sectors = size / 512 os.makedirs(path) @@ -56,30 +53,12 @@ def mount_ramdisk(size, path): def unmount_ramdisk(dev_path, path): - "Unmount RAM disk with `dev_path` and `path`." + """Unmount RAM disk with `dev_path` and `path`.""" run('umount', path) run('diskutil', 'eject', dev_path) run('rm', '-r', path) -def retry(sql, query): - pause = 0.001 - error = sqlite3.OperationalError - - for _ in range(int(LIMITS[u'timeout'] / pause)): - try: - sql(query).fetchone() - except sqlite3.OperationalError as exc: - error = exc - time.sleep(pause) - else: - break - else: - raise error - - del error - - def display(name, timings): cols = ('Action', 'Count', 'Miss', 'Median', 'P90', 'P99', 'Max', 'Total') template = ' '.join(['%9s'] * len(cols)) @@ -98,16 +77,19 @@ def display(name, timings): len_total += len(values) sum_total += sum(values) - print(template % ( - action, - len(values), - len(timings.get(action + '-miss', [])), - secs(percentile(values, 0.5)), - secs(percentile(values, 0.9)), - secs(percentile(values, 0.99)), - secs(percentile(values, 1.0)), - secs(sum(values)), - )) + print( + template + % ( + action, + len(values), + len(timings.get(action + '-miss', [])), + secs(percentile(values, 0.5)), + secs(percentile(values, 0.9)), + secs(percentile(values, 0.99)), + secs(percentile(values, 1.0)), + secs(sum(values)), + ) + ) totals = ('Total', len_total, '', '', '', '', '', secs(sum_total)) print(template % totals) diff --git a/tox.ini b/tox.ini index 9b13a3d..e7217a7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,101 @@ [tox] -envlist=py27,py34,py35,py36 +envlist=bluecheck,doc8,docs,isortcheck,flake8,mypy,pylint,rstcheck,py38,py39,py310,py311 +skip_missing_interpreters=True + [testenv] -deps=nose - mock - django>=1.11,<1.12 -commands=nosetests +commands=pytest +deps= + django==4.2.* + pytest + pytest-cov + pytest-django + pytest-xdist +setenv= + DJANGO_SETTINGS_MODULE=tests.settings + PYTHONPATH={toxinidir} + +[testenv:blue] +commands=blue {toxinidir}/setup.py {toxinidir}/diskcache {toxinidir}/tests +deps=blue + +[testenv:bluecheck] +commands=blue --check {toxinidir}/setup.py {toxinidir}/diskcache {toxinidir}/tests +deps=blue + +[testenv:doc8] +commands=doc8 docs --ignore-path docs/_build +deps=doc8 + +[testenv:docs] +allowlist_externals=make +changedir=docs +commands=make html +deps= + django==4.2.* + sphinx + +[testenv:flake8] +commands=flake8 {toxinidir}/setup.py {toxinidir}/diskcache {toxinidir}/tests +deps=flake8 + +[testenv:isort] +commands=isort {toxinidir}/setup.py {toxinidir}/diskcache {toxinidir}/tests +deps=isort + +[testenv:isortcheck] +commands=isort --check {toxinidir}/setup.py {toxinidir}/diskcache {toxinidir}/tests +deps=isort + +[testenv:mypy] +commands=mypy {toxinidir}/diskcache +deps=mypy + +[testenv:pylint] +commands=pylint {toxinidir}/diskcache +deps= + django==4.2.* + pylint + +[testenv:rstcheck] +commands=rstcheck {toxinidir}/README.rst +deps=rstcheck + +[testenv:uploaddocs] +allowlist_externals=rsync +changedir=docs +commands= + rsync --rsync-path 'sudo -u herokuish rsync' -azP --stats --delete \ + _build/html/ \ + grantjenks:/srv/www/grantjenks.com/public/docs/diskcache/ + +[isort] +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +use_parentheses = True +ensure_newline_before_comments = True +line_length = 79 + +[pytest] +addopts= + -n auto + --cov-branch + --cov-fail-under=98 + --cov-report=term-missing + --cov=diskcache + --doctest-glob="*.rst" + --ignore docs/case-study-web-crawler.rst + --ignore docs/sf-python-2017-meetup-talk.rst + --ignore tests/benchmark_core.py + --ignore tests/benchmark_djangocache.py + --ignore tests/benchmark_glob.py + --ignore tests/issue_85.py + --ignore tests/plot.py + +[doc8] +# ignore=D000 + +[flake8] +exclude=tests/test_djangocache.py +extend-ignore=E203 +max-line-length=120