source: ftputil/lrucache.py @ 1713:f146a1ea66aa

Last change on this file since 1713:f146a1ea66aa was 1713:f146a1ea66aa, checked in by Stefan Schwarzer <sschwarzer@…>, 12 months ago
Remove `__future__` imports With the switch to Python 3.x-only, the `__future__` imports are no longer needed. Update copyright years along with the `__future__` import removal.
File size: 9.7 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4#
5# Copyright 2009-2013 Stefan Schwarzer <sschwarzer@sschwarzer.net>
6# (some changes to the original version)
7
8# Licensed under the Academic Free License 2.1
9
10# Licensed for ftputil under the revised BSD license
11# with permission by the author, Evan Prodromou. Many
12# thanks, Evan! :-)
13#
14# The original file is available at
15# http://pypi.python.org/pypi/lrucache/0.2 .
16
17# arch-tag: LRU cache main module
18
19"""a simple LRU (Least-Recently-Used) cache module
20
21This module provides very simple LRU (Least-Recently-Used) cache
22functionality.
23
24An *in-memory cache* is useful for storing the results of an
25'expensive' process (one that takes a lot of time or resources) for
26later re-use. Typical examples are accessing data from the filesystem,
27a database, or a network location. If you know you'll need to re-read
28the data again, it can help to keep it in a cache.
29
30You *can* use a Python dictionary as a cache for some purposes.
31However, if the results you're caching are large, or you have a lot of
32possible results, this can be impractical memory-wise.
33
34An *LRU cache*, on the other hand, only keeps _some_ of the results in
35memory, which keeps you from overusing resources. The cache is bounded
36by a maximum size; if you try to add more values to the cache, it will
37automatically discard the values that you haven't read or written to
38in the longest time. In other words, the least-recently-used items are
39discarded. [1]_
40
41.. [1]: 'Discarded' here means 'removed from the cache'.
42
43"""
44
45import time
46
47
48# The suffix after the hyphen denotes modifications by the
49# ftputil project with respect to the original version.
50__version__ = "0.2-13"
51__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
52__docformat__ = 'reStructuredText en'
53
54# Default size of a new LRUCache object, if no 'size' argument is given.
55DEFAULT_SIZE = 16
56
57# For Python 2/3 compatibility
58try:
59    long
60    int_types = (int, long)
61except NameError:
62    int_types = (int,)
63
64
65class CacheKeyError(KeyError):
66    """Error raised when cache requests fail.
67
68    When a cache record is accessed which no longer exists (or never did),
69    this error is raised. To avoid it, you may want to check for the existence
70    of a cache record before reading or deleting it.
71    """
72    pass
73
74
75class LRUCache(object):
76    """Least-Recently-Used (LRU) cache.
77
78    Instances of this class provide a least-recently-used (LRU) cache. They
79    emulate a Python mapping type. You can use an LRU cache more or less like
80    a Python dictionary, with the exception that objects you put into the
81    cache may be discarded before you take them out.
82
83    Some example usage::
84
85    cache = LRUCache(32) # new cache
86    cache['foo'] = get_file_contents('foo') # or whatever
87
88    if 'foo' in cache: # if it's still in cache...
89        # use cached version
90        contents = cache['foo']
91    else:
92        # recalculate
93        contents = get_file_contents('foo')
94        # store in cache for next time
95        cache['foo'] = contents
96
97    print cache.size # Maximum size
98
99    print len(cache) # 0 <= len(cache) <= cache.size
100
101    cache.size = 10 # Auto-shrink on size assignment
102
103    for i in range(50): # note: larger than cache size
104        cache[i] = i
105
106    if 0 not in cache: print 'Zero was discarded.'
107
108    if 42 in cache:
109        del cache[42] # Manual deletion
110
111    for j in cache:   # iterate (in LRU order)
112        print j, cache[j] # iterator produces keys, not values
113    """
114
115    class _Node(object):
116        """Record of a cached value. Not for public consumption."""
117
118        def __init__(self, key, obj, timestamp, sort_key):
119            object.__init__(self)
120            self.key = key
121            self.obj = obj
122            self.atime = timestamp
123            self.mtime = self.atime
124            self._sort_key = sort_key
125
126        def __lt__(self, other):
127            # Seems to be preferred over `__cmp__`, at least in newer
128            # Python versions. Uses only around 60 % of the time
129            # with respect to `__cmp__`.
130            return self._sort_key < other._sort_key
131
132        def __repr__(self):
133            return "<%s %s => %s (%s)>" % \
134                   (self.__class__, self.key, self.obj, \
135                    time.asctime(time.localtime(self.atime)))
136
137
138    def __init__(self, size=DEFAULT_SIZE):
139        """Init the `LRUCache` object. `size` is the initial
140        _maximum_ size of the cache. The size can be changed by
141        setting the `size` attribute.
142        """
143        self.clear()
144        # Maximum size of the cache. If more than 'size' elements are
145        # added to the cache, the least-recently-used ones will be
146        # discarded. This assignment implicitly checks the size value.
147        self.size = size
148
149    def clear(self):
150        """Clear the cache, removing all elements.
151
152        The `size` attribute of the cache isn't modified.
153        """
154        # pylint: disable=attribute-defined-outside-init
155        self.__heap = []
156        self.__dict = {}
157        self.__counter = 0
158
159    def _sort_key(self):
160        """Return a new integer value upon every call.
161
162        Cache nodes need a monotonically increasing time indicator.
163        `time.time()` and `time.clock()` don't guarantee this in a
164        platform-independent way.
165
166        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
167        """
168        self.__counter += 1
169        return self.__counter
170
171    def __len__(self):
172        """Return _current_ number of cache entries.
173
174        This may be different from the value of the `size`
175        attribute.
176        """
177        return len(self.__heap)
178
179    def __contains__(self, key):
180        """Return `True` if the item denoted by `key` is in the cache."""
181        return key in self.__dict
182
183    def __setitem__(self, key, obj):
184        """Store item `obj` in the cache under the key `key`.
185
186        If the number of elements after the addition of a new key
187        would exceed the maximum cache size, the least recently
188        used item in the cache is "forgotten".
189        """
190        heap = self.__heap
191        dict_ = self.__dict
192        if key in dict_:
193            node = dict_[key]
194            # Update node object in-place.
195            node.obj = obj
196            node.atime = time.time()
197            node.mtime = node.atime
198            node._sort_key = self._sort_key()
199        else:
200            # The size of the heap can be at most the value of
201            # `self.size` because `__setattr__` decreases the cache
202            # size if the new size value is smaller; so we don't
203            # need a loop _here_.
204            if len(heap) == self.size:
205                lru_node = min(heap)
206                heap.remove(lru_node)
207                del dict_[lru_node.key]
208            node = self._Node(key, obj, time.time(), self._sort_key())
209            dict_[key] = node
210            heap.append(node)
211
212    def __getitem__(self, key):
213        """Return the item stored under `key` key.
214
215        If no such key is present in the cache, raise a
216        `CacheKeyError`.
217        """
218        if not key in self.__dict:
219            raise CacheKeyError(key)
220        else:
221            node = self.__dict[key]
222            # Update node object in-place.
223            node.atime = time.time()
224            node._sort_key = self._sort_key()
225            return node.obj
226
227    def __delitem__(self, key):
228        """Delete the item stored under `key` key.
229
230        If no such key is present in the cache, raise a
231        `CacheKeyError`.
232        """
233        if not key in self.__dict:
234            raise CacheKeyError(key)
235        else:
236            node = self.__dict[key]
237            self.__heap.remove(node)
238            del self.__dict[key]
239            return node.obj
240
241    def __iter__(self):
242        """Iterate over the cache, from the least to the most
243        recently accessed item.
244        """
245        self.__heap.sort()
246        for node in self.__heap:
247            yield node.key
248
249    def __setattr__(self, name, value):
250        """If the name of the attribute is "size", set the
251        _maximum_ size of the cache to the supplied value.
252        """
253        object.__setattr__(self, name, value)
254        # Automagically shrink heap on resize.
255        if name == 'size':
256            size = value
257            if not isinstance(size, int_types):
258                raise TypeError("cache size (%r) must be an integer" % size)
259            if size <= 0:
260                raise ValueError("cache size (%d) must be positive" % size)
261            heap = self.__heap
262            dict_ = self.__dict
263            # Do we need to remove anything at all?
264            if len(heap) <= self.size:
265                return
266            # Remove enough nodes to reach the new size.
267            heap.sort()
268            node_count_to_remove = len(heap) - self.size
269            for node in heap[:node_count_to_remove]:
270                del dict_[node.key]
271            del heap[:node_count_to_remove]
272
273    def __repr__(self):
274        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
275
276    def mtime(self, key):
277        """Return the last modification time for the cache record with key.
278
279        May be useful for cache instances where the stored values can get
280        "stale", such as caching file or network resource contents.
281        """
282        if not key in self.__dict:
283            raise CacheKeyError(key)
284        else:
285            node = self.__dict[key]
286            return node.mtime
287
288
289if __name__ == "__main__":
290    cache = LRUCache(25)
291    print(cache)
292    for i in range(50):
293        cache[i] = str(i)
294    print(cache)
295    if 46 in cache:
296        del cache[46]
297    print(cache)
298    cache.size = 10
299    print(cache)
300    cache[46] = '46'
301    print(cache)
302    print(len(cache))
303    for c in cache:
304        print(c)
305    print(cache)
306    print(cache.mtime(46))
307    for c in cache:
308        print(c)
Note: See TracBrowser for help on using the repository browser.