source: ftputil/lrucache.py @ 1741:0a9be69f9436

Last change on this file since 1741:0a9be69f9436 was 1741:0a9be69f9436, checked in by Stefan Schwarzer <sschwarzer@…>, 4 months ago
Suppress warning about protected access The classes `_Node` and `LRUCache` work closely together. It's ok to access the `_sort_key` attribute of (other) `_Node` instances.
File size: 9.7 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4#
5# Copyright 2009-2018 Stefan Schwarzer <sschwarzer@sschwarzer.net>
6# (some changes to the original version)
7
8# Licensed under the Academic Free License 2.1
9
10# Licensed for ftputil under the revised BSD license
11# with permission by the author, Evan Prodromou. Many
12# thanks, Evan! :-)
13#
14# The original file is available at
15# http://pypi.python.org/pypi/lrucache/0.2 .
16
17# arch-tag: LRU cache main module
18
19"""a simple LRU (Least-Recently-Used) cache module
20
21This module provides very simple LRU (Least-Recently-Used) cache
22functionality.
23
24An *in-memory cache* is useful for storing the results of an
25'expensive' process (one that takes a lot of time or resources) for
26later re-use. Typical examples are accessing data from the filesystem,
27a database, or a network location. If you know you'll need to re-read
28the data again, it can help to keep it in a cache.
29
30You *can* use a Python dictionary as a cache for some purposes.
31However, if the results you're caching are large, or you have a lot of
32possible results, this can be impractical memory-wise.
33
34An *LRU cache*, on the other hand, only keeps _some_ of the results in
35memory, which keeps you from overusing resources. The cache is bounded
36by a maximum size; if you try to add more values to the cache, it will
37automatically discard the values that you haven't read or written to
38in the longest time. In other words, the least-recently-used items are
39discarded. [1]_
40
41.. [1]: 'Discarded' here means 'removed from the cache'.
42
43"""
44
45import time
46
47
48# The suffix after the hyphen denotes modifications by the
49# ftputil project with respect to the original version.
50__version__ = "0.2-15"
51__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
52__docformat__ = 'reStructuredText en'
53
54# Default size of a new LRUCache object, if no 'size' argument is given.
55DEFAULT_SIZE = 16
56
57
58class CacheKeyError(KeyError):
59    """Error raised when cache requests fail.
60
61    When a cache record is accessed which no longer exists (or never did),
62    this error is raised. To avoid it, you may want to check for the existence
63    of a cache record before reading or deleting it.
64    """
65    pass
66
67
68class LRUCache:
69    """Least-Recently-Used (LRU) cache.
70
71    Instances of this class provide a least-recently-used (LRU) cache. They
72    emulate a Python mapping type. You can use an LRU cache more or less like
73    a Python dictionary, with the exception that objects you put into the
74    cache may be discarded before you take them out.
75
76    Some example usage::
77
78    cache = LRUCache(32) # new cache
79    cache['foo'] = get_file_contents('foo') # or whatever
80
81    if 'foo' in cache: # if it's still in cache...
82        # use cached version
83        contents = cache['foo']
84    else:
85        # recalculate
86        contents = get_file_contents('foo')
87        # store in cache for next time
88        cache['foo'] = contents
89
90    print(cache.size) # Maximum size
91
92    print(len(cache)) # 0 <= len(cache) <= cache.size
93
94    cache.size = 10 # Auto-shrink on size assignment
95
96    for i in range(50): # note: larger than cache size
97        cache[i] = i
98
99    if 0 not in cache: print('Zero was discarded.')
100
101    if 42 in cache:
102        del cache[42] # Manual deletion
103
104    for j in cache:   # iterate (in LRU order)
105        print(j, cache[j]) # iterator produces keys, not values
106    """
107
108    class _Node:
109        """Record of a cached value. Not for public consumption."""
110
111        def __init__(self, key, obj, timestamp, sort_key):
112            object.__init__(self)
113            self.key = key
114            self.obj = obj
115            self.atime = timestamp
116            self.mtime = self.atime
117            self._sort_key = sort_key
118
119        def __lt__(self, other):
120            # Seems to be preferred over `__cmp__`, at least in newer
121            # Python versions. Uses only around 60 % of the time
122            # with respect to `__cmp__`.
123            # pylint: disable=protected-access
124            return self._sort_key < other._sort_key
125
126        def __repr__(self):
127            return "<%s %s => %s (%s)>" % \
128                   (self.__class__, self.key, self.obj, \
129                    time.asctime(time.localtime(self.atime)))
130
131
132    def __init__(self, size=DEFAULT_SIZE):
133        """Init the `LRUCache` object. `size` is the initial
134        _maximum_ size of the cache. The size can be changed by
135        setting the `size` attribute.
136        """
137        self.clear()
138        # Maximum size of the cache. If more than 'size' elements are
139        # added to the cache, the least-recently-used ones will be
140        # discarded. This assignment implicitly checks the size value.
141        self.size = size
142
143    def clear(self):
144        """Clear the cache, removing all elements.
145
146        The `size` attribute of the cache isn't modified.
147        """
148        # pylint: disable=attribute-defined-outside-init
149        self.__heap = []
150        self.__dict = {}
151        self.__counter = 0
152
153    def _sort_key(self):
154        """Return a new integer value upon every call.
155
156        Cache nodes need a monotonically increasing time indicator.
157        `time.time()` and `time.clock()` don't guarantee this in a
158        platform-independent way.
159
160        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
161        """
162        self.__counter += 1
163        return self.__counter
164
165    def __len__(self):
166        """Return _current_ number of cache entries.
167
168        This may be different from the value of the `size`
169        attribute.
170        """
171        return len(self.__heap)
172
173    def __contains__(self, key):
174        """Return `True` if the item denoted by `key` is in the cache."""
175        return key in self.__dict
176
177    def __setitem__(self, key, obj):
178        """Store item `obj` in the cache under the key `key`.
179
180        If the number of elements after the addition of a new key
181        would exceed the maximum cache size, the least recently
182        used item in the cache is "forgotten".
183        """
184        heap = self.__heap
185        dict_ = self.__dict
186        if key in dict_:
187            node = dict_[key]
188            # Update node object in-place.
189            node.obj = obj
190            node.atime = time.time()
191            node.mtime = node.atime
192            # pylint: disable=protected-access
193            node._sort_key = self._sort_key()
194        else:
195            # The size of the heap can be at most the value of
196            # `self.size` because `__setattr__` decreases the cache
197            # size if the new size value is smaller; so we don't
198            # need a loop _here_.
199            if len(heap) == self.size:
200                lru_node = min(heap)
201                heap.remove(lru_node)
202                del dict_[lru_node.key]
203            node = self._Node(key, obj, time.time(), self._sort_key())
204            dict_[key] = node
205            heap.append(node)
206
207    def __getitem__(self, key):
208        """Return the item stored under `key` key.
209
210        If no such key is present in the cache, raise a
211        `CacheKeyError`.
212        """
213        if not key in self.__dict:
214            raise CacheKeyError(key)
215        else:
216            node = self.__dict[key]
217            # Update node object in-place.
218            node.atime = time.time()
219            # pylint: disable=protected-access
220            node._sort_key = self._sort_key()
221            return node.obj
222
223    def __delitem__(self, key):
224        """Delete the item stored under `key` key.
225
226        If no such key is present in the cache, raise a
227        `CacheKeyError`.
228        """
229        if not key in self.__dict:
230            raise CacheKeyError(key)
231        else:
232            node = self.__dict[key]
233            self.__heap.remove(node)
234            del self.__dict[key]
235            return node.obj
236
237    def __iter__(self):
238        """Iterate over the cache, from the least to the most
239        recently accessed item.
240        """
241        self.__heap.sort()
242        for node in self.__heap:
243            yield node.key
244
245    def __setattr__(self, name, value):
246        """If the name of the attribute is "size", set the
247        _maximum_ size of the cache to the supplied value.
248        """
249        object.__setattr__(self, name, value)
250        # Automagically shrink heap on resize.
251        if name == 'size':
252            size = value
253            if not isinstance(size, int):
254                raise TypeError("cache size (%r) must be an integer" % size)
255            if size <= 0:
256                raise ValueError("cache size (%d) must be positive" % size)
257            heap = self.__heap
258            dict_ = self.__dict
259            # Do we need to remove anything at all?
260            if len(heap) <= self.size:
261                return
262            # Remove enough nodes to reach the new size.
263            heap.sort()
264            node_count_to_remove = len(heap) - self.size
265            for node in heap[:node_count_to_remove]:
266                del dict_[node.key]
267            del heap[:node_count_to_remove]
268
269    def __repr__(self):
270        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
271
272    def mtime(self, key):
273        """Return the last modification time for the cache record with key.
274
275        May be useful for cache instances where the stored values can get
276        "stale", such as caching file or network resource contents.
277        """
278        if not key in self.__dict:
279            raise CacheKeyError(key)
280        else:
281            node = self.__dict[key]
282            return node.mtime
283
284
285if __name__ == "__main__":
286    cache = LRUCache(25)
287    print(cache)
288    for i in range(50):
289        cache[i] = str(i)
290    print(cache)
291    if 46 in cache:
292        del cache[46]
293    print(cache)
294    cache.size = 10
295    print(cache)
296    cache[46] = '46'
297    print(cache)
298    print(len(cache))
299    for c in cache:
300        print(c)
301    print(cache)
302    print(cache.mtime(46))
303    for c in cache:
304        print(c)
Note: See TracBrowser for help on using the repository browser.