~sschwarzer/ftputil

ftputil/ftputil/lrucache.py -rw-r--r-- 9.9 KiB
77f2ca24Stefan Schwarzer Move item "Push to repository" a month ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# lrucache.py -- a simple LRU (Least-Recently-Used) cache class

# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca>
#
# Copyright 2009-2018 Stefan Schwarzer <sschwarzer@sschwarzer.net>
# (some changes to the original version)

# Licensed under the Academic Free License 2.1
#
# Additionally licensed for ftputil under the 3-clause BSD license
# with permission by the author, Evan Prodromou. This means you can
# use this module under either of the two licenses.
#
# Many thanks for your contribution, Evan! :-)
#
# The original file is available at
# http://pypi.python.org/pypi/lrucache/0.2 .

# arch-tag: LRU cache main module

"""a simple LRU (Least-Recently-Used) cache module

This module provides very simple LRU (Least-Recently-Used) cache
functionality.

An *in-memory cache* is useful for storing the results of an
'expensive' process (one that takes a lot of time or resources) for
later re-use. Typical examples are accessing data from the filesystem,
a database, or a network location. If you know you'll need to re-read
the data again, it can help to keep it in a cache.

You *can* use a Python dictionary as a cache for some purposes.
However, if the results you're caching are large, or you have a lot of
possible results, this can be impractical memory-wise.

An *LRU cache*, on the other hand, only keeps _some_ of the results in
memory, which keeps you from overusing resources. The cache is bounded
by a maximum size; if you try to add more values to the cache, it will
automatically discard the values that you haven't read or written to
in the longest time. In other words, the least-recently-used items are
discarded. [1]_

.. [1]: 'Discarded' here means 'removed from the cache'.

"""

import time


# The suffix after the hyphen denotes modifications by the
# ftputil project with respect to the original version.
__version__ = "0.2-15"
__all__ = ["CacheKeyError", "LRUCache", "DEFAULT_SIZE"]
__docformat__ = "reStructuredText en"

# Default size of a new LRUCache object, if no 'size' argument is given.
DEFAULT_SIZE = 16


class CacheKeyError(KeyError):
    """Error raised when cache requests fail.

    When a cache record is accessed which no longer exists (or never did),
    this error is raised. To avoid it, you may want to check for the existence
    of a cache record before reading or deleting it.
    """

    pass


class LRUCache:
    """Least-Recently-Used (LRU) cache.

    Instances of this class provide a least-recently-used (LRU) cache. They
    emulate a Python mapping type. You can use an LRU cache more or less like
    a Python dictionary, with the exception that objects you put into the
    cache may be discarded before you take them out.

    Some example usage::

    cache = LRUCache(32) # new cache
    cache['foo'] = get_file_contents('foo') # or whatever

    if 'foo' in cache: # if it's still in cache...
        # use cached version
        contents = cache['foo']
    else:
        # recalculate
        contents = get_file_contents('foo')
        # store in cache for next time
        cache['foo'] = contents

    print(cache.size) # Maximum size

    print(len(cache)) # 0 <= len(cache) <= cache.size

    cache.size = 10 # Auto-shrink on size assignment

    for i in range(50): # note: larger than cache size
        cache[i] = i

    if 0 not in cache: print('Zero was discarded.')

    if 42 in cache:
        del cache[42] # Manual deletion

    for j in cache:   # iterate (in LRU order)
        print(j, cache[j]) # iterator produces keys, not values
    """

    class _Node:
        """Record of a cached value. Not for public consumption."""

        def __init__(self, key, obj, timestamp, sort_key):
            object.__init__(self)
            self.key = key
            self.obj = obj
            self.atime = timestamp
            self.mtime = self.atime
            self._sort_key = sort_key

        def __lt__(self, other):
            # Seems to be preferred over `__cmp__`, at least in newer
            # Python versions. Uses only around 60 % of the time
            # with respect to `__cmp__`.
            # pylint: disable=protected-access
            return self._sort_key < other._sort_key

        def __repr__(self):
            return "<%s %s => %s (%s)>" % (
                self.__class__,
                self.key,
                self.obj,
                time.asctime(time.localtime(self.atime)),
            )

    def __init__(self, size=DEFAULT_SIZE):
        """Init the `LRUCache` object. `size` is the initial
        _maximum_ size of the cache. The size can be changed by
        setting the `size` attribute.
        """
        self.clear()
        # Maximum size of the cache. If more than 'size' elements are
        # added to the cache, the least-recently-used ones will be
        # discarded. This assignment implicitly checks the size value.
        self.size = size

    def clear(self):
        """Clear the cache, removing all elements.

        The `size` attribute of the cache isn't modified.
        """
        # pylint: disable=attribute-defined-outside-init
        self.__heap = []
        self.__dict = {}
        self.__counter = 0

    def _sort_key(self):
        """Return a new integer value upon every call.

        Cache nodes need a monotonically increasing time indicator.
        `time.time()` and `time.clock()` don't guarantee this in a
        platform-independent way.

        See http://ftputil.sschwarzer.net/trac/ticket/32 for details.
        """
        self.__counter += 1
        return self.__counter

    def __len__(self):
        """Return _current_ number of cache entries.

        This may be different from the value of the `size`
        attribute.
        """
        return len(self.__heap)

    def __contains__(self, key):
        """Return `True` if the item denoted by `key` is in the cache."""
        return key in self.__dict

    def __setitem__(self, key, obj):
        """Store item `obj` in the cache under the key `key`.

        If the number of elements after the addition of a new key
        would exceed the maximum cache size, the least recently
        used item in the cache is "forgotten".
        """
        heap = self.__heap
        dict_ = self.__dict
        if key in dict_:
            node = dict_[key]
            # Update node object in-place.
            node.obj = obj
            node.atime = time.time()
            node.mtime = node.atime
            # pylint: disable=protected-access
            node._sort_key = self._sort_key()
        else:
            # The size of the heap can be at most the value of
            # `self.size` because `__setattr__` decreases the cache
            # size if the new size value is smaller; so we don't
            # need a loop _here_.
            if len(heap) == self.size:
                lru_node = min(heap)
                heap.remove(lru_node)
                del dict_[lru_node.key]
            node = self._Node(key, obj, time.time(), self._sort_key())
            dict_[key] = node
            heap.append(node)

    def __getitem__(self, key):
        """Return the item stored under `key` key.

        If no such key is present in the cache, raise a
        `CacheKeyError`.
        """
        if not key in self.__dict:
            raise CacheKeyError(key)
        else:
            node = self.__dict[key]
            # Update node object in-place.
            node.atime = time.time()
            # pylint: disable=protected-access
            node._sort_key = self._sort_key()
            return node.obj

    def __delitem__(self, key):
        """Delete the item stored under `key` key.

        If no such key is present in the cache, raise a
        `CacheKeyError`.
        """
        if not key in self.__dict:
            raise CacheKeyError(key)
        else:
            node = self.__dict[key]
            self.__heap.remove(node)
            del self.__dict[key]
            return node.obj

    def __iter__(self):
        """Iterate over the cache, from the least to the most
        recently accessed item.
        """
        self.__heap.sort()
        for node in self.__heap:
            yield node.key

    def __setattr__(self, name, value):
        """If the name of the attribute is "size", set the
        _maximum_ size of the cache to the supplied value.
        """
        object.__setattr__(self, name, value)
        # Automagically shrink heap on resize.
        if name == "size":
            size = value
            if not isinstance(size, int):
                raise TypeError("cache size (%r) must be an integer" % size)
            if size <= 0:
                raise ValueError("cache size (%d) must be positive" % size)
            heap = self.__heap
            dict_ = self.__dict
            # Do we need to remove anything at all?
            if len(heap) <= self.size:
                return
            # Remove enough nodes to reach the new size.
            heap.sort()
            node_count_to_remove = len(heap) - self.size
            for node in heap[:node_count_to_remove]:
                del dict_[node.key]
            del heap[:node_count_to_remove]

    def __repr__(self):
        return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))

    def mtime(self, key):
        """Return the last modification time for the cache record with key.

        May be useful for cache instances where the stored values can get
        "stale", such as caching file or network resource contents.
        """
        if not key in self.__dict:
            raise CacheKeyError(key)
        else:
            node = self.__dict[key]
            return node.mtime


if __name__ == "__main__":
    cache = LRUCache(25)
    print(cache)
    for i in range(50):
        cache[i] = str(i)
    print(cache)
    if 46 in cache:
        del cache[46]
    print(cache)
    cache.size = 10
    print(cache)
    cache[46] = "46"
    print(cache)
    print(len(cache))
    for c in cache:
        print(c)
    print(cache)
    print(cache.mtime(46))
    for c in cache:
        print(c)