source: lrucache.py @ 784:26ec6614a1af

Last change on this file since 784:26ec6614a1af was 784:26ec6614a1af, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Try to minimize further.
File size: 6.6 KB
Line 
1# lrucache.py -- a simple LRU (Least-Recently-Used) cache class
2
3# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
4# Licensed under the Academic Free License 2.1
5
6# Licensed for ftputil under the revised BSD license
7# with permission by the author, Evan Prodromou. Many
8# thanks, Evan! :-)
9#
10# The original file is available at
11# http://pypi.python.org/pypi/lrucache/0.2 .
12
13# arch-tag: LRU cache main module
14
15"""a simple LRU (Least-Recently-Used) cache module
16
17This module provides very simple LRU (Least-Recently-Used) cache
18functionality.
19
20An *in-memory cache* is useful for storing the results of an
21'expensive' process (one that takes a lot of time or resources) for
22later re-use. Typical examples are accessing data from the filesystem,
23a database, or a network location. If you know you'll need to re-read
24the data again, it can help to keep it in a cache.
25
26You *can* use a Python dictionary as a cache for some purposes.
27However, if the results you're caching are large, or you have a lot of
28possible results, this can be impractical memory-wise.
29
30An *LRU cache*, on the other hand, only keeps _some_ of the results in
31memory, which keeps you from overusing resources. The cache is bounded
32by a maximum size; if you try to add more values to the cache, it will
33automatically discard the values that you haven't read or written to
34in the longest time. In other words, the least-recently-used items are
35discarded. [1]_
36
37.. [1]: 'Discarded' here means 'removed from the cache'.
38
39"""
40
41import time
42from ftputil_heapq import heappush, heappop, heapify
43
44__version__ = "0.2"
45__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
46__docformat__ = 'reStructuredText en'
47
48DEFAULT_SIZE = 16
49"""Default size of a new LRUCache object, if no 'size' argument is given."""
50
51class CacheKeyError(KeyError):
52    """Error raised when cache requests fail
53
54    When a cache record is accessed which no longer exists (or never did),
55    this error is raised. To avoid it, you may want to check for the existence
56    of a cache record before reading or deleting it."""
57    pass
58
59class LRUCache(object):
60    """Least-Recently-Used (LRU) cache.
61
62    Instances of this class provide a least-recently-used (LRU) cache. They
63    emulate a Python mapping type. You can use an LRU cache more or less like
64    a Python dictionary, with the exception that objects you put into the
65    cache may be discarded before you take them out.
66
67    Some example usage::
68
69    cache = LRUCache(32) # new cache
70    cache['foo'] = get_file_contents('foo') # or whatever
71
72    if 'foo' in cache: # if it's still in cache...
73        # use cached version
74        contents = cache['foo']
75    else:
76        # recalculate
77        contents = get_file_contents('foo')
78        # store in cache for next time
79        cache['foo'] = contents
80
81    print cache.size # Maximum size
82
83    print len(cache) # 0 <= len(cache) <= cache.size
84
85    cache.size = 10 # Auto-shrink on size assignment
86
87    for i in range(50): # note: larger than cache size
88        cache[i] = i
89
90    if 0 not in cache: print 'Zero was discarded.'
91
92    if 42 in cache:
93        del cache[42] # Manual deletion
94
95    for j in cache:   # iterate (in LRU order)
96        print j, cache[j] # iterator produces keys, not values
97    """
98
99    class __Node(object):
100        """Record of a cached value. Not for public consumption."""
101
102        def __init__(self, key, obj, timestamp):
103            object.__init__(self)
104            self.key = key
105            self.obj = obj
106            self.atime = timestamp
107            self.mtime = self.atime
108
109        def __cmp__(self, other):
110            return cmp(self.atime, other.atime)
111
112        def __repr__(self):
113            return "Node %s => %s" % (self.key, self.obj)
114#             return "<%s %s => %s (%s)>" % \
115#                    (self.__class__, self.key, self.obj, \
116#                     time.asctime(time.localtime(self.atime)))
117
118    def __init__(self, size=DEFAULT_SIZE):
119        # Check arguments
120        if size <= 0:
121            raise ValueError, size
122        elif type(size) is not type(0):
123            raise TypeError, size
124        object.__init__(self)
125        self.__heap = []
126        self.__dict = {}
127        self.size = size
128        """Maximum size of the cache.
129        If more than 'size' elements are added to the cache,
130        the least-recently-used ones will be discarded."""
131
132    def __len__(self):
133        return len(self.__heap)
134
135    def __contains__(self, key):
136        return self.__dict.has_key(key)
137
138    def __setitem__(self, key, obj):
139        if self.__dict.has_key(key):
140            node = self.__dict[key]
141            node.obj = obj
142            node.atime = time.time()
143            node.mtime = node.atime
144            heapify(self.__heap)
145        else:
146            # size may have been reset, so we loop
147            while len(self.__heap) >= self.size:
148                lru = heappop(self.__heap)
149                del self.__dict[lru.key]
150            node = self.__Node(key, obj, time.time())
151            self.__dict[key] = node
152            heappush(self.__heap, node)
153
154    def __getitem__(self, key):
155        if not self.__dict.has_key(key):
156            raise CacheKeyError(key)
157        else:
158            node = self.__dict[key]
159            node.atime = time.time()
160            heapify(self.__heap)
161            return node.obj
162
163    def __delitem__(self, key):
164        if not self.__dict.has_key(key):
165            raise CacheKeyError(key)
166        else:
167            node = self.__dict[key]
168            del self.__dict[key]
169            self.__heap.remove(node)
170            heapify(self.__heap)
171            return node.obj
172
173    def __iter__(self):
174        copy = self.__heap[:]
175        while len(copy) > 0:
176            node = heappop(copy)
177            yield node.key
178        raise StopIteration
179
180    def __setattr__(self, name, value):
181        object.__setattr__(self, name, value)
182        # automagically shrink heap on resize
183        if name == 'size':
184            while len(self.__heap) > value:
185                lru = heappop(self.__heap)
186                del self.__dict[lru.key]
187
188    def __repr__(self):
189        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
190
191    def mtime(self, key):
192        """Return the last modification time for the cache record with key.
193        May be useful for cache instances where the stored values can get
194        'stale', such as caching file or network resource contents."""
195        if not self.__dict.has_key(key):
196            raise CacheKeyError(key)
197        else:
198            node = self.__dict[key]
199            return node.mtime
200
201if __name__ == "__main__":
202    cache = LRUCache(25)
203    for i in range(30):
204        cache[i] = str(i)
205    if 26 in cache:
206        del cache[26]
207    # traceback occurs in this assignment
208    cache.size = 10
Note: See TracBrowser for help on using the repository browser.