root/trunk/lrucache.py

Revision 763, 6.8 kB (checked in by schwa, 3 weeks ago)
Added svn:mime-type text/x-python to the Python files which didn't
have the MIME type set.
  • Property svn:mime-type set to text/x-python
  • Property svn:eol-style set to native
Line 
1 # lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3 # Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4 # Licensed under the Academic Free License 2.1
5
6 # Licensed for ftputil under the revised BSD license
7 # with permission by the author, Evan Prodromou. Many
8 # thanks, Evan! :-)
9 #
10 # The original file is available at
11 # http://pypi.python.org/pypi/lrucache/0.2 .
12
13 # arch-tag: LRU cache main module
14
15 """a simple LRU (Least-Recently-Used) cache module
16
17 This module provides very simple LRU (Least-Recently-Used) cache
18 functionality.
19
20 An *in-memory cache* is useful for storing the results of an
21 'expensive' process (one that takes a lot of time or resources) for
22 later re-use. Typical examples are accessing data from the filesystem,
23 a database, or a network location. If you know you'll need to re-read
24 the data again, it can help to keep it in a cache.
25
26 You *can* use a Python dictionary as a cache for some purposes.
27 However, if the results you're caching are large, or you have a lot of
28 possible results, this can be impractical memory-wise.
29
30 An *LRU cache*, on the other hand, only keeps _some_ of the results in
31 memory, which keeps you from overusing resources. The cache is bounded
32 by a maximum size; if you try to add more values to the cache, it will
33 automatically discard the values that you haven't read or written to
34 in the longest time. In other words, the least-recently-used items are
35 discarded. [1]_
36
37 .. [1]: 'Discarded' here means 'removed from the cache'.
38
39 """
40
41 from __future__ import generators
42 import time
43 from heapq import heappush, heappop, heapify
44
45 __version__ = "0.2"
46 __all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
47 __docformat__ = 'reStructuredText en'
48
49 DEFAULT_SIZE = 16
50 """Default size of a new LRUCache object, if no 'size' argument is given."""
51
52 class CacheKeyError(KeyError):
53     """Error raised when cache requests fail
54
55     When a cache record is accessed which no longer exists (or never did),
56     this error is raised. To avoid it, you may want to check for the existence
57     of a cache record before reading or deleting it."""
58     pass
59
60 class LRUCache(object):
61     """Least-Recently-Used (LRU) cache.
62
63     Instances of this class provide a least-recently-used (LRU) cache. They
64     emulate a Python mapping type. You can use an LRU cache more or less like
65     a Python dictionary, with the exception that objects you put into the
66     cache may be discarded before you take them out.
67
68     Some example usage::
69
70     cache = LRUCache(32) # new cache
71     cache['foo'] = get_file_contents('foo') # or whatever
72
73     if 'foo' in cache: # if it's still in cache...
74         # use cached version
75         contents = cache['foo']
76     else:
77         # recalculate
78         contents = get_file_contents('foo')
79         # store in cache for next time
80         cache['foo'] = contents
81
82     print cache.size # Maximum size
83
84     print len(cache) # 0 <= len(cache) <= cache.size
85
86     cache.size = 10 # Auto-shrink on size assignment
87
88     for i in range(50): # note: larger than cache size
89         cache[i] = i
90
91     if 0 not in cache: print 'Zero was discarded.'
92
93     if 42 in cache:
94         del cache[42] # Manual deletion
95
96     for j in cache:   # iterate (in LRU order)
97         print j, cache[j] # iterator produces keys, not values
98     """
99
100     class __Node(object):
101         """Record of a cached value. Not for public consumption."""
102
103         def __init__(self, key, obj, timestamp):
104             object.__init__(self)
105             self.key = key
106             self.obj = obj
107             self.atime = timestamp
108             self.mtime = self.atime
109
110         def __cmp__(self, other):
111             return cmp(self.atime, other.atime)
112
113         def __repr__(self):
114             return "<%s %s => %s (%s)>" % \
115                    (self.__class__, self.key, self.obj, \
116                     time.asctime(time.localtime(self.atime)))
117
118     def __init__(self, size=DEFAULT_SIZE):
119         # Check arguments
120         if size <= 0:
121             raise ValueError, size
122         elif type(size) is not type(0):
123             raise TypeError, size
124         object.__init__(self)
125         self.__heap = []
126         self.__dict = {}
127         self.size = size
128         """Maximum size of the cache.
129         If more than 'size' elements are added to the cache,
130         the least-recently-used ones will be discarded."""
131
132     def __len__(self):
133         return len(self.__heap)
134
135     def __contains__(self, key):
136         return self.__dict.has_key(key)
137
138     def __setitem__(self, key, obj):
139         if self.__dict.has_key(key):
140             node = self.__dict[key]
141             node.obj = obj
142             node.atime = time.time()
143             node.mtime = node.atime
144             heapify(self.__heap)
145         else:
146             # size may have been reset, so we loop
147             while len(self.__heap) >= self.size:
148                 lru = heappop(self.__heap)
149                 del self.__dict[lru.key]
150             node = self.__Node(key, obj, time.time())
151             self.__dict[key] = node
152             heappush(self.__heap, node)
153
154     def __getitem__(self, key):
155         if not self.__dict.has_key(key):
156             raise CacheKeyError(key)
157         else:
158             node = self.__dict[key]
159             node.atime = time.time()
160             heapify(self.__heap)
161             return node.obj
162
163     def __delitem__(self, key):
164         if not self.__dict.has_key(key):
165             raise CacheKeyError(key)
166         else:
167             node = self.__dict[key]
168             del self.__dict[key]
169             self.__heap.remove(node)
170             heapify(self.__heap)
171             return node.obj
172
173     def __iter__(self):
174         copy = self.__heap[:]
175         while len(copy) > 0:
176             node = heappop(copy)
177             yield node.key
178         raise StopIteration
179
180     def __setattr__(self, name, value):
181         object.__setattr__(self, name, value)
182         # automagically shrink heap on resize
183         if name == 'size':
184             while len(self.__heap) > value:
185                 lru = heappop(self.__heap)
186                 del self.__dict[lru.key]
187
188     def __repr__(self):
189         return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
190
191     def mtime(self, key):
192         """Return the last modification time for the cache record with key.
193         May be useful for cache instances where the stored values can get
194         'stale', such as caching file or network resource contents."""
195         if not self.__dict.has_key(key):
196             raise CacheKeyError(key)
197         else:
198             node = self.__dict[key]
199             return node.mtime
200
201 if __name__ == "__main__":
202     cache = LRUCache(25)
203     print cache
204     for i in range(50):
205         cache[i] = str(i)
206     print cache
207     if 46 in cache:
208         del cache[46]
209     print cache
210     cache.size = 10
211     print cache
212     cache[46] = '46'
213     print cache
214     print len(cache)
215     for c in cache:
216         print c
217     print cache
218     print cache.mtime(46)
219     for c in cache:
220         print c
Note: See TracBrowser for help on using the browser.