source: lrucache.py @ 786:4da2efa4bf8f

Last change on this file since 786:4da2efa4bf8f was 786:4da2efa4bf8f, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Reverted changes which should have gone into the issue32 branch.
File size: 6.8 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4# Licensed under the Academic Free License 2.1
5
6# Licensed for ftputil under the revised BSD license
7# with permission by the author, Evan Prodromou. Many
8# thanks, Evan! :-)
9#
10# The original file is available at
11# http://pypi.python.org/pypi/lrucache/0.2 .
12
13# arch-tag: LRU cache main module
14
15"""a simple LRU (Least-Recently-Used) cache module
16
17This module provides very simple LRU (Least-Recently-Used) cache
18functionality.
19
20An *in-memory cache* is useful for storing the results of an
21'expensive' process (one that takes a lot of time or resources) for
22later re-use. Typical examples are accessing data from the filesystem,
23a database, or a network location. If you know you'll need to re-read
24the data again, it can help to keep it in a cache.
25
26You *can* use a Python dictionary as a cache for some purposes.
27However, if the results you're caching are large, or you have a lot of
28possible results, this can be impractical memory-wise.
29
30An *LRU cache*, on the other hand, only keeps _some_ of the results in
31memory, which keeps you from overusing resources. The cache is bounded
32by a maximum size; if you try to add more values to the cache, it will
33automatically discard the values that you haven't read or written to
34in the longest time. In other words, the least-recently-used items are
35discarded. [1]_
36
37.. [1]: 'Discarded' here means 'removed from the cache'.
38
39"""
40
41from __future__ import generators
42import time
43from heapq import heappush, heappop, heapify
44
45__version__ = "0.2"
46__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
47__docformat__ = 'reStructuredText en'
48
49DEFAULT_SIZE = 16
50"""Default size of a new LRUCache object, if no 'size' argument is given."""
51
52class CacheKeyError(KeyError):
53    """Error raised when cache requests fail
54
55    When a cache record is accessed which no longer exists (or never did),
56    this error is raised. To avoid it, you may want to check for the existence
57    of a cache record before reading or deleting it."""
58    pass
59
60class LRUCache(object):
61    """Least-Recently-Used (LRU) cache.
62
63    Instances of this class provide a least-recently-used (LRU) cache. They
64    emulate a Python mapping type. You can use an LRU cache more or less like
65    a Python dictionary, with the exception that objects you put into the
66    cache may be discarded before you take them out.
67
68    Some example usage::
69
70    cache = LRUCache(32) # new cache
71    cache['foo'] = get_file_contents('foo') # or whatever
72
73    if 'foo' in cache: # if it's still in cache...
74        # use cached version
75        contents = cache['foo']
76    else:
77        # recalculate
78        contents = get_file_contents('foo')
79        # store in cache for next time
80        cache['foo'] = contents
81
82    print cache.size # Maximum size
83
84    print len(cache) # 0 <= len(cache) <= cache.size
85
86    cache.size = 10 # Auto-shrink on size assignment
87
88    for i in range(50): # note: larger than cache size
89        cache[i] = i
90
91    if 0 not in cache: print 'Zero was discarded.'
92
93    if 42 in cache:
94        del cache[42] # Manual deletion
95
96    for j in cache:   # iterate (in LRU order)
97        print j, cache[j] # iterator produces keys, not values
98    """
99
100    class __Node(object):
101        """Record of a cached value. Not for public consumption."""
102
103        def __init__(self, key, obj, timestamp):
104            object.__init__(self)
105            self.key = key
106            self.obj = obj
107            self.atime = timestamp
108            self.mtime = self.atime
109
110        def __cmp__(self, other):
111            return cmp(self.atime, other.atime)
112
113        def __repr__(self):
114            return "<%s %s => %s (%s)>" % \
115                   (self.__class__, self.key, self.obj, \
116                    time.asctime(time.localtime(self.atime)))
117
118    def __init__(self, size=DEFAULT_SIZE):
119        # Check arguments
120        if size <= 0:
121            raise ValueError, size
122        elif type(size) is not type(0):
123            raise TypeError, size
124        object.__init__(self)
125        self.__heap = []
126        self.__dict = {}
127        self.size = size
128        """Maximum size of the cache.
129        If more than 'size' elements are added to the cache,
130        the least-recently-used ones will be discarded."""
131
132    def __len__(self):
133        return len(self.__heap)
134
135    def __contains__(self, key):
136        return self.__dict.has_key(key)
137
138    def __setitem__(self, key, obj):
139        if self.__dict.has_key(key):
140            node = self.__dict[key]
141            node.obj = obj
142            node.atime = time.time()
143            node.mtime = node.atime
144            heapify(self.__heap)
145        else:
146            # size may have been reset, so we loop
147            while len(self.__heap) >= self.size:
148                lru = heappop(self.__heap)
149                del self.__dict[lru.key]
150            node = self.__Node(key, obj, time.time())
151            self.__dict[key] = node
152            heappush(self.__heap, node)
153
154    def __getitem__(self, key):
155        if not self.__dict.has_key(key):
156            raise CacheKeyError(key)
157        else:
158            node = self.__dict[key]
159            node.atime = time.time()
160            heapify(self.__heap)
161            return node.obj
162
163    def __delitem__(self, key):
164        if not self.__dict.has_key(key):
165            raise CacheKeyError(key)
166        else:
167            node = self.__dict[key]
168            del self.__dict[key]
169            self.__heap.remove(node)
170            heapify(self.__heap)
171            return node.obj
172
173    def __iter__(self):
174        copy = self.__heap[:]
175        while len(copy) > 0:
176            node = heappop(copy)
177            yield node.key
178        raise StopIteration
179
180    def __setattr__(self, name, value):
181        object.__setattr__(self, name, value)
182        # automagically shrink heap on resize
183        if name == 'size':
184            while len(self.__heap) > value:
185                lru = heappop(self.__heap)
186                del self.__dict[lru.key]
187
188    def __repr__(self):
189        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
190
191    def mtime(self, key):
192        """Return the last modification time for the cache record with key.
193        May be useful for cache instances where the stored values can get
194        'stale', such as caching file or network resource contents."""
195        if not self.__dict.has_key(key):
196            raise CacheKeyError(key)
197        else:
198            node = self.__dict[key]
199            return node.mtime
200
201if __name__ == "__main__":
202    cache = LRUCache(25)
203    print cache
204    for i in range(50):
205        cache[i] = str(i)
206    print cache
207    if 46 in cache:
208        del cache[46]
209    print cache
210    cache.size = 10
211    print cache
212    cache[46] = '46'
213    print cache
214    print len(cache)
215    for c in cache:
216        print c
217    print cache
218    print cache.mtime(46)
219    for c in cache:
220        print c
Note: See TracBrowser for help on using the repository browser.