source: lrucache.py @ 788:b263a58e4a48

Last change on this file since 788:b263a58e4a48 was 788:b263a58e4a48, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Added comment for `_sort_key` method from Peter Stirling's patch.
File size: 7.4 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4# Licensed under the Academic Free License 2.1
5
6# Licensed for ftputil under the revised BSD license
7# with permission by the author, Evan Prodromou. Many
8# thanks, Evan! :-)
9#
10# The original file is available at
11# http://pypi.python.org/pypi/lrucache/0.2 .
12
13# arch-tag: LRU cache main module
14
15"""a simple LRU (Least-Recently-Used) cache module
16
17This module provides very simple LRU (Least-Recently-Used) cache
18functionality.
19
20An *in-memory cache* is useful for storing the results of an
21'expensive' process (one that takes a lot of time or resources) for
22later re-use. Typical examples are accessing data from the filesystem,
23a database, or a network location. If you know you'll need to re-read
24the data again, it can help to keep it in a cache.
25
26You *can* use a Python dictionary as a cache for some purposes.
27However, if the results you're caching are large, or you have a lot of
28possible results, this can be impractical memory-wise.
29
30An *LRU cache*, on the other hand, only keeps _some_ of the results in
31memory, which keeps you from overusing resources. The cache is bounded
32by a maximum size; if you try to add more values to the cache, it will
33automatically discard the values that you haven't read or written to
34in the longest time. In other words, the least-recently-used items are
35discarded. [1]_
36
37.. [1]: 'Discarded' here means 'removed from the cache'.
38
39"""
40
41from __future__ import generators
42import time
43from heapq import heappush, heappop, heapify
44
45__version__ = "0.2.1"
46__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
47__docformat__ = 'reStructuredText en'
48
49DEFAULT_SIZE = 16
50"""Default size of a new LRUCache object, if no 'size' argument is given."""
51
52class CacheKeyError(KeyError):
53    """Error raised when cache requests fail
54
55    When a cache record is accessed which no longer exists (or never did),
56    this error is raised. To avoid it, you may want to check for the existence
57    of a cache record before reading or deleting it."""
58    pass
59
60class LRUCache(object):
61    """Least-Recently-Used (LRU) cache.
62
63    Instances of this class provide a least-recently-used (LRU) cache. They
64    emulate a Python mapping type. You can use an LRU cache more or less like
65    a Python dictionary, with the exception that objects you put into the
66    cache may be discarded before you take them out.
67
68    Some example usage::
69
70    cache = LRUCache(32) # new cache
71    cache['foo'] = get_file_contents('foo') # or whatever
72
73    if 'foo' in cache: # if it's still in cache...
74        # use cached version
75        contents = cache['foo']
76    else:
77        # recalculate
78        contents = get_file_contents('foo')
79        # store in cache for next time
80        cache['foo'] = contents
81
82    print cache.size # Maximum size
83
84    print len(cache) # 0 <= len(cache) <= cache.size
85
86    cache.size = 10 # Auto-shrink on size assignment
87
88    for i in range(50): # note: larger than cache size
89        cache[i] = i
90
91    if 0 not in cache: print 'Zero was discarded.'
92
93    if 42 in cache:
94        del cache[42] # Manual deletion
95
96    for j in cache:   # iterate (in LRU order)
97        print j, cache[j] # iterator produces keys, not values
98    """
99
100    class __Node(object):
101        """Record of a cached value. Not for public consumption."""
102
103        def __init__(self, key, obj, timestamp, sort_key):
104            object.__init__(self)
105            self.key = key
106            self.obj = obj
107            self.atime = timestamp
108            self.mtime = self.atime
109            self._sort_key = sort_key
110
111        def __cmp__(self, other):
112            return cmp(self._sort_key, other._sort_key)
113
114        def __repr__(self):
115            return "<%s %s => %s (%s)>" % \
116                   (self.__class__, self.key, self.obj, \
117                    time.asctime(time.localtime(self.atime)))
118
119    def __init__(self, size=DEFAULT_SIZE):
120        # Check arguments
121        if size <= 0:
122            raise ValueError, size
123        elif type(size) is not type(0):
124            raise TypeError, size
125        object.__init__(self)
126        self.__heap = []
127        self.__dict = {}
128        """Maximum size of the cache.
129        If more than 'size' elements are added to the cache,
130        the least-recently-used ones will be discarded."""
131        self.size = size
132        self.__counter = 0
133
134    def _sort_key(self):
135        """Return a new integer value upon every call.
136       
137        Cache nodes need a monotonically increasing time indicator.
138        time.time() and time.clock() don't guarantee this in a
139        platform-independent way.
140        """
141        self.__counter += 1
142        return self.__counter
143
144    def __len__(self):
145        return len(self.__heap)
146
147    def __contains__(self, key):
148        return self.__dict.has_key(key)
149
150    def __setitem__(self, key, obj):
151        if self.__dict.has_key(key):
152            node = self.__dict[key]
153            # update node object in-place
154            node.obj = obj
155            node.atime = time.time()
156            node.mtime = node.atime
157            node._sort_key = self._sort_key()
158            heapify(self.__heap)
159        else:
160            # size may have been reset, so we loop
161            while len(self.__heap) >= self.size:
162                lru = heappop(self.__heap)
163                del self.__dict[lru.key]
164            node = self.__Node(key, obj, time.time(), self._sort_key())
165            self.__dict[key] = node
166            heappush(self.__heap, node)
167
168    def __getitem__(self, key):
169        if not self.__dict.has_key(key):
170            raise CacheKeyError(key)
171        else:
172            node = self.__dict[key]
173            # update node object in-place
174            node.atime = time.time()
175            node._sort_key = self._sort_key()
176            heapify(self.__heap)
177            return node.obj
178
179    def __delitem__(self, key):
180        if not self.__dict.has_key(key):
181            raise CacheKeyError(key)
182        else:
183            node = self.__dict[key]
184            del self.__dict[key]
185            self.__heap.remove(node)
186            heapify(self.__heap)
187            return node.obj
188
189    def __iter__(self):
190        copy = self.__heap[:]
191        while len(copy) > 0:
192            node = heappop(copy)
193            yield node.key
194        raise StopIteration
195
196    def __setattr__(self, name, value):
197        object.__setattr__(self, name, value)
198        # automagically shrink heap on resize
199        if name == 'size':
200            while len(self.__heap) > value:
201                lru = heappop(self.__heap)
202                del self.__dict[lru.key]
203
204    def __repr__(self):
205        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
206
207    def mtime(self, key):
208        """Return the last modification time for the cache record with key.
209        May be useful for cache instances where the stored values can get
210        'stale', such as caching file or network resource contents."""
211        if not self.__dict.has_key(key):
212            raise CacheKeyError(key)
213        else:
214            node = self.__dict[key]
215            return node.mtime
216
217if __name__ == "__main__":
218    cache = LRUCache(25)
219    print cache
220    for i in range(50):
221        cache[i] = str(i)
222    print cache
223    if 46 in cache:
224        del cache[46]
225    print cache
226    cache.size = 10
227    print cache
228    cache[46] = '46'
229    print cache
230    print len(cache)
231    for c in cache:
232        print c
233    print cache
234    print cache.mtime(46)
235    for c in cache:
236        print c
Note: See TracBrowser for help on using the repository browser.