source: ftputil/lrucache.py

Last change on this file was 1848:7cca1ad2711f, checked in by Stefan Schwarzer <sschwarzer@…>, 6 months ago
Format source code with Black Format code in `ftputil` and `test` with Black. Don't format files in the `private` or other directories (or least not yet).
File size: 9.7 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4#
5# Copyright 2009-2018 Stefan Schwarzer <sschwarzer@sschwarzer.net>
6# (some changes to the original version)
7
8# Licensed under the Academic Free License 2.1
9
10# Licensed for ftputil under the revised BSD license
11# with permission by the author, Evan Prodromou. Many
12# thanks, Evan! :-)
13#
14# The original file is available at
15# http://pypi.python.org/pypi/lrucache/0.2 .
16
17# arch-tag: LRU cache main module
18
19"""a simple LRU (Least-Recently-Used) cache module
20
21This module provides very simple LRU (Least-Recently-Used) cache
22functionality.
23
24An *in-memory cache* is useful for storing the results of an
25'expensive' process (one that takes a lot of time or resources) for
26later re-use. Typical examples are accessing data from the filesystem,
27a database, or a network location. If you know you'll need to re-read
28the data again, it can help to keep it in a cache.
29
30You *can* use a Python dictionary as a cache for some purposes.
31However, if the results you're caching are large, or you have a lot of
32possible results, this can be impractical memory-wise.
33
34An *LRU cache*, on the other hand, only keeps _some_ of the results in
35memory, which keeps you from overusing resources. The cache is bounded
36by a maximum size; if you try to add more values to the cache, it will
37automatically discard the values that you haven't read or written to
38in the longest time. In other words, the least-recently-used items are
39discarded. [1]_
40
41.. [1]: 'Discarded' here means 'removed from the cache'.
42
43"""
44
45import time
46
47
48# The suffix after the hyphen denotes modifications by the
49# ftputil project with respect to the original version.
50__version__ = "0.2-15"
51__all__ = ["CacheKeyError", "LRUCache", "DEFAULT_SIZE"]
52__docformat__ = "reStructuredText en"
53
54# Default size of a new LRUCache object, if no 'size' argument is given.
55DEFAULT_SIZE = 16
56
57
58class CacheKeyError(KeyError):
59    """Error raised when cache requests fail.
60
61    When a cache record is accessed which no longer exists (or never did),
62    this error is raised. To avoid it, you may want to check for the existence
63    of a cache record before reading or deleting it.
64    """
65
66    pass
67
68
69class LRUCache:
70    """Least-Recently-Used (LRU) cache.
71
72    Instances of this class provide a least-recently-used (LRU) cache. They
73    emulate a Python mapping type. You can use an LRU cache more or less like
74    a Python dictionary, with the exception that objects you put into the
75    cache may be discarded before you take them out.
76
77    Some example usage::
78
79    cache = LRUCache(32) # new cache
80    cache['foo'] = get_file_contents('foo') # or whatever
81
82    if 'foo' in cache: # if it's still in cache...
83        # use cached version
84        contents = cache['foo']
85    else:
86        # recalculate
87        contents = get_file_contents('foo')
88        # store in cache for next time
89        cache['foo'] = contents
90
91    print(cache.size) # Maximum size
92
93    print(len(cache)) # 0 <= len(cache) <= cache.size
94
95    cache.size = 10 # Auto-shrink on size assignment
96
97    for i in range(50): # note: larger than cache size
98        cache[i] = i
99
100    if 0 not in cache: print('Zero was discarded.')
101
102    if 42 in cache:
103        del cache[42] # Manual deletion
104
105    for j in cache:   # iterate (in LRU order)
106        print(j, cache[j]) # iterator produces keys, not values
107    """
108
109    class _Node:
110        """Record of a cached value. Not for public consumption."""
111
112        def __init__(self, key, obj, timestamp, sort_key):
113            object.__init__(self)
114            self.key = key
115            self.obj = obj
116            self.atime = timestamp
117            self.mtime = self.atime
118            self._sort_key = sort_key
119
120        def __lt__(self, other):
121            # Seems to be preferred over `__cmp__`, at least in newer
122            # Python versions. Uses only around 60 % of the time
123            # with respect to `__cmp__`.
124            # pylint: disable=protected-access
125            return self._sort_key < other._sort_key
126
127        def __repr__(self):
128            return "<%s %s => %s (%s)>" % (
129                self.__class__,
130                self.key,
131                self.obj,
132                time.asctime(time.localtime(self.atime)),
133            )
134
135    def __init__(self, size=DEFAULT_SIZE):
136        """Init the `LRUCache` object. `size` is the initial
137        _maximum_ size of the cache. The size can be changed by
138        setting the `size` attribute.
139        """
140        self.clear()
141        # Maximum size of the cache. If more than 'size' elements are
142        # added to the cache, the least-recently-used ones will be
143        # discarded. This assignment implicitly checks the size value.
144        self.size = size
145
146    def clear(self):
147        """Clear the cache, removing all elements.
148
149        The `size` attribute of the cache isn't modified.
150        """
151        # pylint: disable=attribute-defined-outside-init
152        self.__heap = []
153        self.__dict = {}
154        self.__counter = 0
155
156    def _sort_key(self):
157        """Return a new integer value upon every call.
158
159        Cache nodes need a monotonically increasing time indicator.
160        `time.time()` and `time.clock()` don't guarantee this in a
161        platform-independent way.
162
163        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
164        """
165        self.__counter += 1
166        return self.__counter
167
168    def __len__(self):
169        """Return _current_ number of cache entries.
170
171        This may be different from the value of the `size`
172        attribute.
173        """
174        return len(self.__heap)
175
176    def __contains__(self, key):
177        """Return `True` if the item denoted by `key` is in the cache."""
178        return key in self.__dict
179
180    def __setitem__(self, key, obj):
181        """Store item `obj` in the cache under the key `key`.
182
183        If the number of elements after the addition of a new key
184        would exceed the maximum cache size, the least recently
185        used item in the cache is "forgotten".
186        """
187        heap = self.__heap
188        dict_ = self.__dict
189        if key in dict_:
190            node = dict_[key]
191            # Update node object in-place.
192            node.obj = obj
193            node.atime = time.time()
194            node.mtime = node.atime
195            # pylint: disable=protected-access
196            node._sort_key = self._sort_key()
197        else:
198            # The size of the heap can be at most the value of
199            # `self.size` because `__setattr__` decreases the cache
200            # size if the new size value is smaller; so we don't
201            # need a loop _here_.
202            if len(heap) == self.size:
203                lru_node = min(heap)
204                heap.remove(lru_node)
205                del dict_[lru_node.key]
206            node = self._Node(key, obj, time.time(), self._sort_key())
207            dict_[key] = node
208            heap.append(node)
209
210    def __getitem__(self, key):
211        """Return the item stored under `key` key.
212
213        If no such key is present in the cache, raise a
214        `CacheKeyError`.
215        """
216        if not key in self.__dict:
217            raise CacheKeyError(key)
218        else:
219            node = self.__dict[key]
220            # Update node object in-place.
221            node.atime = time.time()
222            # pylint: disable=protected-access
223            node._sort_key = self._sort_key()
224            return node.obj
225
226    def __delitem__(self, key):
227        """Delete the item stored under `key` key.
228
229        If no such key is present in the cache, raise a
230        `CacheKeyError`.
231        """
232        if not key in self.__dict:
233            raise CacheKeyError(key)
234        else:
235            node = self.__dict[key]
236            self.__heap.remove(node)
237            del self.__dict[key]
238            return node.obj
239
240    def __iter__(self):
241        """Iterate over the cache, from the least to the most
242        recently accessed item.
243        """
244        self.__heap.sort()
245        for node in self.__heap:
246            yield node.key
247
248    def __setattr__(self, name, value):
249        """If the name of the attribute is "size", set the
250        _maximum_ size of the cache to the supplied value.
251        """
252        object.__setattr__(self, name, value)
253        # Automagically shrink heap on resize.
254        if name == "size":
255            size = value
256            if not isinstance(size, int):
257                raise TypeError("cache size (%r) must be an integer" % size)
258            if size <= 0:
259                raise ValueError("cache size (%d) must be positive" % size)
260            heap = self.__heap
261            dict_ = self.__dict
262            # Do we need to remove anything at all?
263            if len(heap) <= self.size:
264                return
265            # Remove enough nodes to reach the new size.
266            heap.sort()
267            node_count_to_remove = len(heap) - self.size
268            for node in heap[:node_count_to_remove]:
269                del dict_[node.key]
270            del heap[:node_count_to_remove]
271
272    def __repr__(self):
273        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
274
275    def mtime(self, key):
276        """Return the last modification time for the cache record with key.
277
278        May be useful for cache instances where the stored values can get
279        "stale", such as caching file or network resource contents.
280        """
281        if not key in self.__dict:
282            raise CacheKeyError(key)
283        else:
284            node = self.__dict[key]
285            return node.mtime
286
287
288if __name__ == "__main__":
289    cache = LRUCache(25)
290    print(cache)
291    for i in range(50):
292        cache[i] = str(i)
293    print(cache)
294    if 46 in cache:
295        del cache[46]
296    print(cache)
297    cache.size = 10
298    print(cache)
299    cache[46] = "46"
300    print(cache)
301    print(len(cache))
302    for c in cache:
303        print(c)
304    print(cache)
305    print(cache.mtime(46))
306    for c in cache:
307        print(c)
Note: See TracBrowser for help on using the repository browser.