source: ftputil/lrucache.py @ 1544:1a72b2809d48

Last change on this file since 1544:1a72b2809d48 was 1544:1a72b2809d48, checked in by Stefan Schwarzer <sschwarzer@…>, 5 years ago
Fixed typos.
File size: 9.7 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4#
5# Copyright 2009-2013 Stefan Schwarzer <sschwarzer@sschwarzer.net>
6# (some changes to the original version)
7
8# Licensed under the Academic Free License 2.1
9
10# Licensed for ftputil under the revised BSD license
11# with permission by the author, Evan Prodromou. Many
12# thanks, Evan! :-)
13#
14# The original file is available at
15# http://pypi.python.org/pypi/lrucache/0.2 .
16
17# arch-tag: LRU cache main module
18
19"""a simple LRU (Least-Recently-Used) cache module
20
21This module provides very simple LRU (Least-Recently-Used) cache
22functionality.
23
24An *in-memory cache* is useful for storing the results of an
25'expensive' process (one that takes a lot of time or resources) for
26later re-use. Typical examples are accessing data from the filesystem,
27a database, or a network location. If you know you'll need to re-read
28the data again, it can help to keep it in a cache.
29
30You *can* use a Python dictionary as a cache for some purposes.
31However, if the results you're caching are large, or you have a lot of
32possible results, this can be impractical memory-wise.
33
34An *LRU cache*, on the other hand, only keeps _some_ of the results in
35memory, which keeps you from overusing resources. The cache is bounded
36by a maximum size; if you try to add more values to the cache, it will
37automatically discard the values that you haven't read or written to
38in the longest time. In other words, the least-recently-used items are
39discarded. [1]_
40
41.. [1]: 'Discarded' here means 'removed from the cache'.
42
43"""
44
45from __future__ import unicode_literals
46
47import time
48
49
50# The suffix after the hyphen denotes modifications by the
51# ftputil project with respect to the original version.
52__version__ = "0.2-12"
53__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
54__docformat__ = 'reStructuredText en'
55
56# Default size of a new LRUCache object, if no 'size' argument is given.
57DEFAULT_SIZE = 16
58
59# For Python 2/3 compatibility
60try:
61    long
62    int_types = (int, long)
63except NameError:
64    int_types = (int,)
65
66
67class CacheKeyError(KeyError):
68    """Error raised when cache requests fail.
69
70    When a cache record is accessed which no longer exists (or never did),
71    this error is raised. To avoid it, you may want to check for the existence
72    of a cache record before reading or deleting it.
73    """
74    pass
75
76
77class LRUCache(object):
78    """Least-Recently-Used (LRU) cache.
79
80    Instances of this class provide a least-recently-used (LRU) cache. They
81    emulate a Python mapping type. You can use an LRU cache more or less like
82    a Python dictionary, with the exception that objects you put into the
83    cache may be discarded before you take them out.
84
85    Some example usage::
86
87    cache = LRUCache(32) # new cache
88    cache['foo'] = get_file_contents('foo') # or whatever
89
90    if 'foo' in cache: # if it's still in cache...
91        # use cached version
92        contents = cache['foo']
93    else:
94        # recalculate
95        contents = get_file_contents('foo')
96        # store in cache for next time
97        cache['foo'] = contents
98
99    print cache.size # Maximum size
100
101    print len(cache) # 0 <= len(cache) <= cache.size
102
103    cache.size = 10 # Auto-shrink on size assignment
104
105    for i in range(50): # note: larger than cache size
106        cache[i] = i
107
108    if 0 not in cache: print 'Zero was discarded.'
109
110    if 42 in cache:
111        del cache[42] # Manual deletion
112
113    for j in cache:   # iterate (in LRU order)
114        print j, cache[j] # iterator produces keys, not values
115    """
116
117    class _Node(object):
118        """Record of a cached value. Not for public consumption."""
119
120        def __init__(self, key, obj, timestamp, sort_key):
121            object.__init__(self)
122            self.key = key
123            self.obj = obj
124            self.atime = timestamp
125            self.mtime = self.atime
126            self._sort_key = sort_key
127
128        def __lt__(self, other):
129            # Seems to be preferred over `__cmp__`, at least in newer
130            # Python versions. Uses only around 60 % of the time
131            # with respect to `__cmp__`.
132            return self._sort_key < other._sort_key
133
134        def __repr__(self):
135            return "<%s %s => %s (%s)>" % \
136                   (self.__class__, self.key, self.obj, \
137                    time.asctime(time.localtime(self.atime)))
138
139
140    def __init__(self, size=DEFAULT_SIZE):
141        """Init the `LRUCache` object. `size` is the initial
142        _maximum_ size of the cache. The size can be changed by
143        setting the `size` attribute.
144        """
145        self.clear()
146        # Maximum size of the cache. If more than 'size' elements are
147        # added to the cache, the least-recently-used ones will be
148        # discarded. This assignment implicitly checks the size value.
149        self.size = size
150
151    def clear(self):
152        """Clear the cache, removing all elements.
153
154        The `size` attribute of the cache isn't modified.
155        """
156        # pylint: disable=attribute-defined-outside-init
157        self.__heap = []
158        self.__dict = {}
159        self.__counter = 0
160
161    def _sort_key(self):
162        """Return a new integer value upon every call.
163
164        Cache nodes need a monotonically increasing time indicator.
165        `time.time()` and `time.clock()` don't guarantee this in a
166        platform-independent way.
167
168        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
169        """
170        self.__counter += 1
171        return self.__counter
172
173    def __len__(self):
174        """Return _current_ number of cache entries.
175
176        This may be different from the value of the `size`
177        attribute.
178        """
179        return len(self.__heap)
180
181    def __contains__(self, key):
182        """Return `True` if the item denoted by `key` is in the cache."""
183        return key in self.__dict
184
185    def __setitem__(self, key, obj):
186        """Store item `obj` in the cache under the key `key`.
187
188        If the number of elements after the addition of a new key
189        would exceed the maximum cache size, the least recently
190        used item in the cache is "forgotten".
191        """
192        heap = self.__heap
193        dict_ = self.__dict
194        if key in dict_:
195            node = dict_[key]
196            # Update node object in-place.
197            node.obj = obj
198            node.atime = time.time()
199            node.mtime = node.atime
200            node._sort_key = self._sort_key()
201        else:
202            # The size of the heap can be at most the value of
203            # `self.size` because `__setattr__` decreases the cache
204            # size if the new size value is smaller; so we don't
205            # need a loop _here_.
206            if len(heap) == self.size:
207                lru_node = min(heap)
208                heap.remove(lru_node)
209                del dict_[lru_node.key]
210            node = self._Node(key, obj, time.time(), self._sort_key())
211            dict_[key] = node
212            heap.append(node)
213
214    def __getitem__(self, key):
215        """Return the item stored under `key` key.
216
217        If no such key is present in the cache, raise a
218        `CacheKeyError`.
219        """
220        if not key in self.__dict:
221            raise CacheKeyError(key)
222        else:
223            node = self.__dict[key]
224            # Update node object in-place.
225            node.atime = time.time()
226            node._sort_key = self._sort_key()
227            return node.obj
228
229    def __delitem__(self, key):
230        """Delete the item stored under `key` key.
231
232        If no such key is present in the cache, raise a
233        `CacheKeyError`.
234        """
235        if not key in self.__dict:
236            raise CacheKeyError(key)
237        else:
238            node = self.__dict[key]
239            self.__heap.remove(node)
240            del self.__dict[key]
241            return node.obj
242
243    def __iter__(self):
244        """Iterate over the cache, from the least to the most
245        recently accessed item.
246        """
247        self.__heap.sort()
248        for node in self.__heap:
249            yield node.key
250
251    def __setattr__(self, name, value):
252        """If the name of the attribute is "size", set the
253        _maximum_ size of the cache to the supplied value.
254        """
255        object.__setattr__(self, name, value)
256        # Automagically shrink heap on resize.
257        if name == 'size':
258            size = value
259            if not isinstance(size, int_types):
260                raise TypeError("cache size (%r) must be an integer" % size)
261            if size <= 0:
262                raise ValueError("cache size (%d) must be positive" % size)
263            heap = self.__heap
264            dict_ = self.__dict
265            # Do we need to remove anything at all?
266            if len(heap) <= self.size:
267                return
268            # Remove enough nodes to reach the new size.
269            heap.sort()
270            node_count_to_remove = len(heap) - self.size
271            for node in heap[:node_count_to_remove]:
272                del dict_[node.key]
273            del heap[:node_count_to_remove]
274
275    def __repr__(self):
276        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
277
278    def mtime(self, key):
279        """Return the last modification time for the cache record with key.
280
281        May be useful for cache instances where the stored values can get
282        "stale", such as caching file or network resource contents.
283        """
284        if not key in self.__dict:
285            raise CacheKeyError(key)
286        else:
287            node = self.__dict[key]
288            return node.mtime
289
290
291if __name__ == "__main__":
292    cache = LRUCache(25)
293    print(cache)
294    for i in range(50):
295        cache[i] = str(i)
296    print(cache)
297    if 46 in cache:
298        del cache[46]
299    print(cache)
300    cache.size = 10
301    print(cache)
302    cache[46] = '46'
303    print(cache)
304    print(len(cache))
305    for c in cache:
306        print(c)
307    print(cache)
308    print(cache.mtime(46))
309    for c in cache:
310        print(c)
Note: See TracBrowser for help on using the repository browser.