source: lrucache.py @ 787:0b29d67d4210

Last change on this file since 787:0b29d67d4210 was 787:0b29d67d4210, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Use a counter instead of `time.time()` to determine the heap order. This fixes bug #32.
File size: 7.2 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4# Licensed under the Academic Free License 2.1
5
6# Licensed for ftputil under the revised BSD license
7# with permission by the author, Evan Prodromou. Many
8# thanks, Evan! :-)
9#
10# The original file is available at
11# http://pypi.python.org/pypi/lrucache/0.2 .
12
13# arch-tag: LRU cache main module
14
15"""a simple LRU (Least-Recently-Used) cache module
16
17This module provides very simple LRU (Least-Recently-Used) cache
18functionality.
19
20An *in-memory cache* is useful for storing the results of an
21'expensive' process (one that takes a lot of time or resources) for
22later re-use. Typical examples are accessing data from the filesystem,
23a database, or a network location. If you know you'll need to re-read
24the data again, it can help to keep it in a cache.
25
26You *can* use a Python dictionary as a cache for some purposes.
27However, if the results you're caching are large, or you have a lot of
28possible results, this can be impractical memory-wise.
29
30An *LRU cache*, on the other hand, only keeps _some_ of the results in
31memory, which keeps you from overusing resources. The cache is bounded
32by a maximum size; if you try to add more values to the cache, it will
33automatically discard the values that you haven't read or written to
34in the longest time. In other words, the least-recently-used items are
35discarded. [1]_
36
37.. [1]: 'Discarded' here means 'removed from the cache'.
38
39"""
40
41from __future__ import generators
42import time
43from heapq import heappush, heappop, heapify
44
45__version__ = "0.2.1"
46__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
47__docformat__ = 'reStructuredText en'
48
49DEFAULT_SIZE = 16
50"""Default size of a new LRUCache object, if no 'size' argument is given."""
51
52class CacheKeyError(KeyError):
53    """Error raised when cache requests fail
54
55    When a cache record is accessed which no longer exists (or never did),
56    this error is raised. To avoid it, you may want to check for the existence
57    of a cache record before reading or deleting it."""
58    pass
59
60class LRUCache(object):
61    """Least-Recently-Used (LRU) cache.
62
63    Instances of this class provide a least-recently-used (LRU) cache. They
64    emulate a Python mapping type. You can use an LRU cache more or less like
65    a Python dictionary, with the exception that objects you put into the
66    cache may be discarded before you take them out.
67
68    Some example usage::
69
70    cache = LRUCache(32) # new cache
71    cache['foo'] = get_file_contents('foo') # or whatever
72
73    if 'foo' in cache: # if it's still in cache...
74        # use cached version
75        contents = cache['foo']
76    else:
77        # recalculate
78        contents = get_file_contents('foo')
79        # store in cache for next time
80        cache['foo'] = contents
81
82    print cache.size # Maximum size
83
84    print len(cache) # 0 <= len(cache) <= cache.size
85
86    cache.size = 10 # Auto-shrink on size assignment
87
88    for i in range(50): # note: larger than cache size
89        cache[i] = i
90
91    if 0 not in cache: print 'Zero was discarded.'
92
93    if 42 in cache:
94        del cache[42] # Manual deletion
95
96    for j in cache:   # iterate (in LRU order)
97        print j, cache[j] # iterator produces keys, not values
98    """
99
100    class __Node(object):
101        """Record of a cached value. Not for public consumption."""
102
103        def __init__(self, key, obj, timestamp, sort_key):
104            object.__init__(self)
105            self.key = key
106            self.obj = obj
107            self.atime = timestamp
108            self.mtime = self.atime
109            self._sort_key = sort_key
110
111        def __cmp__(self, other):
112            return cmp(self._sort_key, other._sort_key)
113
114        def __repr__(self):
115            return "<%s %s => %s (%s)>" % \
116                   (self.__class__, self.key, self.obj, \
117                    time.asctime(time.localtime(self.atime)))
118
119    def __init__(self, size=DEFAULT_SIZE):
120        # Check arguments
121        if size <= 0:
122            raise ValueError, size
123        elif type(size) is not type(0):
124            raise TypeError, size
125        object.__init__(self)
126        self.__heap = []
127        self.__dict = {}
128        """Maximum size of the cache.
129        If more than 'size' elements are added to the cache,
130        the least-recently-used ones will be discarded."""
131        self.size = size
132        self.__counter = 0
133
134    def _sort_key(self):
135        """Return a new integer value upon every call."""
136        self.__counter += 1
137        return self.__counter
138
139    def __len__(self):
140        return len(self.__heap)
141
142    def __contains__(self, key):
143        return self.__dict.has_key(key)
144
145    def __setitem__(self, key, obj):
146        if self.__dict.has_key(key):
147            node = self.__dict[key]
148            # update node object in-place
149            node.obj = obj
150            node.atime = time.time()
151            node.mtime = node.atime
152            node._sort_key = self._sort_key()
153            heapify(self.__heap)
154        else:
155            # size may have been reset, so we loop
156            while len(self.__heap) >= self.size:
157                lru = heappop(self.__heap)
158                del self.__dict[lru.key]
159            node = self.__Node(key, obj, time.time(), self._sort_key())
160            self.__dict[key] = node
161            heappush(self.__heap, node)
162
163    def __getitem__(self, key):
164        if not self.__dict.has_key(key):
165            raise CacheKeyError(key)
166        else:
167            node = self.__dict[key]
168            # update node object in-place
169            node.atime = time.time()
170            node._sort_key = self._sort_key()
171            heapify(self.__heap)
172            return node.obj
173
174    def __delitem__(self, key):
175        if not self.__dict.has_key(key):
176            raise CacheKeyError(key)
177        else:
178            node = self.__dict[key]
179            del self.__dict[key]
180            self.__heap.remove(node)
181            heapify(self.__heap)
182            return node.obj
183
184    def __iter__(self):
185        copy = self.__heap[:]
186        while len(copy) > 0:
187            node = heappop(copy)
188            yield node.key
189        raise StopIteration
190
191    def __setattr__(self, name, value):
192        object.__setattr__(self, name, value)
193        # automagically shrink heap on resize
194        if name == 'size':
195            while len(self.__heap) > value:
196                lru = heappop(self.__heap)
197                del self.__dict[lru.key]
198
199    def __repr__(self):
200        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
201
202    def mtime(self, key):
203        """Return the last modification time for the cache record with key.
204        May be useful for cache instances where the stored values can get
205        'stale', such as caching file or network resource contents."""
206        if not self.__dict.has_key(key):
207            raise CacheKeyError(key)
208        else:
209            node = self.__dict[key]
210            return node.mtime
211
212if __name__ == "__main__":
213    cache = LRUCache(25)
214    print cache
215    for i in range(50):
216        cache[i] = str(i)
217    print cache
218    if 46 in cache:
219        del cache[46]
220    print cache
221    cache.size = 10
222    print cache
223    cache[46] = '46'
224    print cache
225    print len(cache)
226    for c in cache:
227        print c
228    print cache
229    print cache.mtime(46)
230    for c in cache:
231        print c
Note: See TracBrowser for help on using the repository browser.