source: ftputil/lrucache.py @ 1736:4c45dfcbf33a

Last change on this file since 1736:4c45dfcbf33a was 1736:4c45dfcbf33a, checked in by Stefan Schwarzer <sschwarzer@…>, 12 months ago
Update copyright year Update copyright year because of the recent changes to the file.
File size: 9.6 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4#
5# Copyright 2009-2018 Stefan Schwarzer <sschwarzer@sschwarzer.net>
6# (some changes to the original version)
7
8# Licensed under the Academic Free License 2.1
9
10# Licensed for ftputil under the revised BSD license
11# with permission by the author, Evan Prodromou. Many
12# thanks, Evan! :-)
13#
14# The original file is available at
15# http://pypi.python.org/pypi/lrucache/0.2 .
16
17# arch-tag: LRU cache main module
18
19"""a simple LRU (Least-Recently-Used) cache module
20
21This module provides very simple LRU (Least-Recently-Used) cache
22functionality.
23
24An *in-memory cache* is useful for storing the results of an
25'expensive' process (one that takes a lot of time or resources) for
26later re-use. Typical examples are accessing data from the filesystem,
27a database, or a network location. If you know you'll need to re-read
28the data again, it can help to keep it in a cache.
29
30You *can* use a Python dictionary as a cache for some purposes.
31However, if the results you're caching are large, or you have a lot of
32possible results, this can be impractical memory-wise.
33
34An *LRU cache*, on the other hand, only keeps _some_ of the results in
35memory, which keeps you from overusing resources. The cache is bounded
36by a maximum size; if you try to add more values to the cache, it will
37automatically discard the values that you haven't read or written to
38in the longest time. In other words, the least-recently-used items are
39discarded. [1]_
40
41.. [1]: 'Discarded' here means 'removed from the cache'.
42
43"""
44
45import time
46
47
48# The suffix after the hyphen denotes modifications by the
49# ftputil project with respect to the original version.
50__version__ = "0.2-15"
51__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
52__docformat__ = 'reStructuredText en'
53
54# Default size of a new LRUCache object, if no 'size' argument is given.
55DEFAULT_SIZE = 16
56
57
58class CacheKeyError(KeyError):
59    """Error raised when cache requests fail.
60
61    When a cache record is accessed which no longer exists (or never did),
62    this error is raised. To avoid it, you may want to check for the existence
63    of a cache record before reading or deleting it.
64    """
65    pass
66
67
68class LRUCache:
69    """Least-Recently-Used (LRU) cache.
70
71    Instances of this class provide a least-recently-used (LRU) cache. They
72    emulate a Python mapping type. You can use an LRU cache more or less like
73    a Python dictionary, with the exception that objects you put into the
74    cache may be discarded before you take them out.
75
76    Some example usage::
77
78    cache = LRUCache(32) # new cache
79    cache['foo'] = get_file_contents('foo') # or whatever
80
81    if 'foo' in cache: # if it's still in cache...
82        # use cached version
83        contents = cache['foo']
84    else:
85        # recalculate
86        contents = get_file_contents('foo')
87        # store in cache for next time
88        cache['foo'] = contents
89
90    print(cache.size) # Maximum size
91
92    print(len(cache)) # 0 <= len(cache) <= cache.size
93
94    cache.size = 10 # Auto-shrink on size assignment
95
96    for i in range(50): # note: larger than cache size
97        cache[i] = i
98
99    if 0 not in cache: print('Zero was discarded.')
100
101    if 42 in cache:
102        del cache[42] # Manual deletion
103
104    for j in cache:   # iterate (in LRU order)
105        print(j, cache[j]) # iterator produces keys, not values
106    """
107
108    class _Node:
109        """Record of a cached value. Not for public consumption."""
110
111        def __init__(self, key, obj, timestamp, sort_key):
112            object.__init__(self)
113            self.key = key
114            self.obj = obj
115            self.atime = timestamp
116            self.mtime = self.atime
117            self._sort_key = sort_key
118
119        def __lt__(self, other):
120            # Seems to be preferred over `__cmp__`, at least in newer
121            # Python versions. Uses only around 60 % of the time
122            # with respect to `__cmp__`.
123            return self._sort_key < other._sort_key
124
125        def __repr__(self):
126            return "<%s %s => %s (%s)>" % \
127                   (self.__class__, self.key, self.obj, \
128                    time.asctime(time.localtime(self.atime)))
129
130
131    def __init__(self, size=DEFAULT_SIZE):
132        """Init the `LRUCache` object. `size` is the initial
133        _maximum_ size of the cache. The size can be changed by
134        setting the `size` attribute.
135        """
136        self.clear()
137        # Maximum size of the cache. If more than 'size' elements are
138        # added to the cache, the least-recently-used ones will be
139        # discarded. This assignment implicitly checks the size value.
140        self.size = size
141
142    def clear(self):
143        """Clear the cache, removing all elements.
144
145        The `size` attribute of the cache isn't modified.
146        """
147        # pylint: disable=attribute-defined-outside-init
148        self.__heap = []
149        self.__dict = {}
150        self.__counter = 0
151
152    def _sort_key(self):
153        """Return a new integer value upon every call.
154
155        Cache nodes need a monotonically increasing time indicator.
156        `time.time()` and `time.clock()` don't guarantee this in a
157        platform-independent way.
158
159        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
160        """
161        self.__counter += 1
162        return self.__counter
163
164    def __len__(self):
165        """Return _current_ number of cache entries.
166
167        This may be different from the value of the `size`
168        attribute.
169        """
170        return len(self.__heap)
171
172    def __contains__(self, key):
173        """Return `True` if the item denoted by `key` is in the cache."""
174        return key in self.__dict
175
176    def __setitem__(self, key, obj):
177        """Store item `obj` in the cache under the key `key`.
178
179        If the number of elements after the addition of a new key
180        would exceed the maximum cache size, the least recently
181        used item in the cache is "forgotten".
182        """
183        heap = self.__heap
184        dict_ = self.__dict
185        if key in dict_:
186            node = dict_[key]
187            # Update node object in-place.
188            node.obj = obj
189            node.atime = time.time()
190            node.mtime = node.atime
191            node._sort_key = self._sort_key()
192        else:
193            # The size of the heap can be at most the value of
194            # `self.size` because `__setattr__` decreases the cache
195            # size if the new size value is smaller; so we don't
196            # need a loop _here_.
197            if len(heap) == self.size:
198                lru_node = min(heap)
199                heap.remove(lru_node)
200                del dict_[lru_node.key]
201            node = self._Node(key, obj, time.time(), self._sort_key())
202            dict_[key] = node
203            heap.append(node)
204
205    def __getitem__(self, key):
206        """Return the item stored under `key` key.
207
208        If no such key is present in the cache, raise a
209        `CacheKeyError`.
210        """
211        if not key in self.__dict:
212            raise CacheKeyError(key)
213        else:
214            node = self.__dict[key]
215            # Update node object in-place.
216            node.atime = time.time()
217            node._sort_key = self._sort_key()
218            return node.obj
219
220    def __delitem__(self, key):
221        """Delete the item stored under `key` key.
222
223        If no such key is present in the cache, raise a
224        `CacheKeyError`.
225        """
226        if not key in self.__dict:
227            raise CacheKeyError(key)
228        else:
229            node = self.__dict[key]
230            self.__heap.remove(node)
231            del self.__dict[key]
232            return node.obj
233
234    def __iter__(self):
235        """Iterate over the cache, from the least to the most
236        recently accessed item.
237        """
238        self.__heap.sort()
239        for node in self.__heap:
240            yield node.key
241
242    def __setattr__(self, name, value):
243        """If the name of the attribute is "size", set the
244        _maximum_ size of the cache to the supplied value.
245        """
246        object.__setattr__(self, name, value)
247        # Automagically shrink heap on resize.
248        if name == 'size':
249            size = value
250            if not isinstance(size, int):
251                raise TypeError("cache size (%r) must be an integer" % size)
252            if size <= 0:
253                raise ValueError("cache size (%d) must be positive" % size)
254            heap = self.__heap
255            dict_ = self.__dict
256            # Do we need to remove anything at all?
257            if len(heap) <= self.size:
258                return
259            # Remove enough nodes to reach the new size.
260            heap.sort()
261            node_count_to_remove = len(heap) - self.size
262            for node in heap[:node_count_to_remove]:
263                del dict_[node.key]
264            del heap[:node_count_to_remove]
265
266    def __repr__(self):
267        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
268
269    def mtime(self, key):
270        """Return the last modification time for the cache record with key.
271
272        May be useful for cache instances where the stored values can get
273        "stale", such as caching file or network resource contents.
274        """
275        if not key in self.__dict:
276            raise CacheKeyError(key)
277        else:
278            node = self.__dict[key]
279            return node.mtime
280
281
282if __name__ == "__main__":
283    cache = LRUCache(25)
284    print(cache)
285    for i in range(50):
286        cache[i] = str(i)
287    print(cache)
288    if 46 in cache:
289        del cache[46]
290    print(cache)
291    cache.size = 10
292    print(cache)
293    cache[46] = '46'
294    print(cache)
295    print(len(cache))
296    for c in cache:
297        print(c)
298    print(cache)
299    print(cache.mtime(46))
300    for c in cache:
301        print(c)
Note: See TracBrowser for help on using the repository browser.