source: lrucache.py @ 789:b3672c4b4ce5

Last change on this file since 789:b3672c4b4ce5 was 789:b3672c4b4ce5, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Distinguish version number parts by Evan and by the ftputil project.
File size: 7.5 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4# Licensed under the Academic Free License 2.1
5
6# Licensed for ftputil under the revised BSD license
7# with permission by the author, Evan Prodromou. Many
8# thanks, Evan! :-)
9#
10# The original file is available at
11# http://pypi.python.org/pypi/lrucache/0.2 .
12
13# arch-tag: LRU cache main module
14
15"""a simple LRU (Least-Recently-Used) cache module
16
17This module provides very simple LRU (Least-Recently-Used) cache
18functionality.
19
20An *in-memory cache* is useful for storing the results of an
21'expensive' process (one that takes a lot of time or resources) for
22later re-use. Typical examples are accessing data from the filesystem,
23a database, or a network location. If you know you'll need to re-read
24the data again, it can help to keep it in a cache.
25
26You *can* use a Python dictionary as a cache for some purposes.
27However, if the results you're caching are large, or you have a lot of
28possible results, this can be impractical memory-wise.
29
30An *LRU cache*, on the other hand, only keeps _some_ of the results in
31memory, which keeps you from overusing resources. The cache is bounded
32by a maximum size; if you try to add more values to the cache, it will
33automatically discard the values that you haven't read or written to
34in the longest time. In other words, the least-recently-used items are
35discarded. [1]_
36
37.. [1]: 'Discarded' here means 'removed from the cache'.
38
39"""
40
41from __future__ import generators
42import time
43from heapq import heappush, heappop, heapify
44
45# the suffix after the hyphen denotes modifications by the
46#  ftputil project with respect to the original version
47__version__ = "0.2-1"
48__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
49__docformat__ = 'reStructuredText en'
50
51DEFAULT_SIZE = 16
52"""Default size of a new LRUCache object, if no 'size' argument is given."""
53
54class CacheKeyError(KeyError):
55    """Error raised when cache requests fail
56
57    When a cache record is accessed which no longer exists (or never did),
58    this error is raised. To avoid it, you may want to check for the existence
59    of a cache record before reading or deleting it."""
60    pass
61
62class LRUCache(object):
63    """Least-Recently-Used (LRU) cache.
64
65    Instances of this class provide a least-recently-used (LRU) cache. They
66    emulate a Python mapping type. You can use an LRU cache more or less like
67    a Python dictionary, with the exception that objects you put into the
68    cache may be discarded before you take them out.
69
70    Some example usage::
71
72    cache = LRUCache(32) # new cache
73    cache['foo'] = get_file_contents('foo') # or whatever
74
75    if 'foo' in cache: # if it's still in cache...
76        # use cached version
77        contents = cache['foo']
78    else:
79        # recalculate
80        contents = get_file_contents('foo')
81        # store in cache for next time
82        cache['foo'] = contents
83
84    print cache.size # Maximum size
85
86    print len(cache) # 0 <= len(cache) <= cache.size
87
88    cache.size = 10 # Auto-shrink on size assignment
89
90    for i in range(50): # note: larger than cache size
91        cache[i] = i
92
93    if 0 not in cache: print 'Zero was discarded.'
94
95    if 42 in cache:
96        del cache[42] # Manual deletion
97
98    for j in cache:   # iterate (in LRU order)
99        print j, cache[j] # iterator produces keys, not values
100    """
101
102    class __Node(object):
103        """Record of a cached value. Not for public consumption."""
104
105        def __init__(self, key, obj, timestamp, sort_key):
106            object.__init__(self)
107            self.key = key
108            self.obj = obj
109            self.atime = timestamp
110            self.mtime = self.atime
111            self._sort_key = sort_key
112
113        def __cmp__(self, other):
114            return cmp(self._sort_key, other._sort_key)
115
116        def __repr__(self):
117            return "<%s %s => %s (%s)>" % \
118                   (self.__class__, self.key, self.obj, \
119                    time.asctime(time.localtime(self.atime)))
120
121    def __init__(self, size=DEFAULT_SIZE):
122        # Check arguments
123        if size <= 0:
124            raise ValueError, size
125        elif type(size) is not type(0):
126            raise TypeError, size
127        object.__init__(self)
128        self.__heap = []
129        self.__dict = {}
130        """Maximum size of the cache.
131        If more than 'size' elements are added to the cache,
132        the least-recently-used ones will be discarded."""
133        self.size = size
134        self.__counter = 0
135
136    def _sort_key(self):
137        """Return a new integer value upon every call.
138       
139        Cache nodes need a monotonically increasing time indicator.
140        time.time() and time.clock() don't guarantee this in a
141        platform-independent way.
142        """
143        self.__counter += 1
144        return self.__counter
145
146    def __len__(self):
147        return len(self.__heap)
148
149    def __contains__(self, key):
150        return self.__dict.has_key(key)
151
152    def __setitem__(self, key, obj):
153        if self.__dict.has_key(key):
154            node = self.__dict[key]
155            # update node object in-place
156            node.obj = obj
157            node.atime = time.time()
158            node.mtime = node.atime
159            node._sort_key = self._sort_key()
160            heapify(self.__heap)
161        else:
162            # size may have been reset, so we loop
163            while len(self.__heap) >= self.size:
164                lru = heappop(self.__heap)
165                del self.__dict[lru.key]
166            node = self.__Node(key, obj, time.time(), self._sort_key())
167            self.__dict[key] = node
168            heappush(self.__heap, node)
169
170    def __getitem__(self, key):
171        if not self.__dict.has_key(key):
172            raise CacheKeyError(key)
173        else:
174            node = self.__dict[key]
175            # update node object in-place
176            node.atime = time.time()
177            node._sort_key = self._sort_key()
178            heapify(self.__heap)
179            return node.obj
180
181    def __delitem__(self, key):
182        if not self.__dict.has_key(key):
183            raise CacheKeyError(key)
184        else:
185            node = self.__dict[key]
186            del self.__dict[key]
187            self.__heap.remove(node)
188            heapify(self.__heap)
189            return node.obj
190
191    def __iter__(self):
192        copy = self.__heap[:]
193        while len(copy) > 0:
194            node = heappop(copy)
195            yield node.key
196        raise StopIteration
197
198    def __setattr__(self, name, value):
199        object.__setattr__(self, name, value)
200        # automagically shrink heap on resize
201        if name == 'size':
202            while len(self.__heap) > value:
203                lru = heappop(self.__heap)
204                del self.__dict[lru.key]
205
206    def __repr__(self):
207        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
208
209    def mtime(self, key):
210        """Return the last modification time for the cache record with key.
211        May be useful for cache instances where the stored values can get
212        'stale', such as caching file or network resource contents."""
213        if not self.__dict.has_key(key):
214            raise CacheKeyError(key)
215        else:
216            node = self.__dict[key]
217            return node.mtime
218
219if __name__ == "__main__":
220    cache = LRUCache(25)
221    print cache
222    for i in range(50):
223        cache[i] = str(i)
224    print cache
225    if 46 in cache:
226        del cache[46]
227    print cache
228    cache.size = 10
229    print cache
230    cache[46] = '46'
231    print cache
232    print len(cache)
233    for c in cache:
234        print c
235    print cache
236    print cache.mtime(46)
237    for c in cache:
238        print c
Note: See TracBrowser for help on using the repository browser.