microproduct/atmosphericDelay/ISCEApp/site-packages/whoosh/util/cache.py

376 lines
13 KiB
Python

# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
from __future__ import with_statement
import functools, random
from array import array
from heapq import nsmallest
from operator import itemgetter
from threading import Lock
from time import time
from whoosh.compat import iteritems, xrange
try:
from collections import Counter
except ImportError:
class Counter(dict):
def __missing__(self, key):
return 0
def unbound_cache(func):
"""Caching decorator with an unbounded cache size.
"""
cache = {}
@functools.wraps(func)
def caching_wrapper(*args):
try:
return cache[args]
except KeyError:
result = func(*args)
cache[args] = result
return result
return caching_wrapper
def lru_cache(maxsize=100):
"""A simple cache that, when the cache is full, deletes the least recently
used 10% of the cached values.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # Hits, misses
data = {}
lastused = {}
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iteritems(lastused),
key=itemgetter(1)):
del data[k]
del lastused[k]
data[args] = user_function(*args)
result = data[args]
finally:
lastused[args] = time()
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
lastused.clear()
stats[0] = stats[1] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def lfu_cache(maxsize=100):
"""A simple cache that, when the cache is full, deletes the least frequently
used 10% of the cached values.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # Hits, misses
data = {}
usecount = Counter()
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iteritems(usecount),
key=itemgetter(1)):
del data[k]
del usecount[k]
data[args] = user_function(*args)
result = data[args]
finally:
usecount[args] += 1
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
usecount.clear()
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def random_cache(maxsize=100):
"""A very simple cache that, when the cache is filled, deletes 10% of the
cached values AT RANDOM.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # hits, misses
data = {}
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
keys = data.keys()
for i in xrange(maxsize // 10 or 1):
n = random.randint(0, len(keys) - 1)
k = keys.pop(n)
del data[k]
data[args] = user_function(*args)
result = data[args]
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def db_lru_cache(maxsize=100):
"""Double-barrel least-recently-used cache decorator. This is a simple
LRU algorithm that keeps a primary and secondary dict. Keys are checked
in the primary dict, and then the secondary. Once the primary dict fills
up, the secondary dict is cleared and the two dicts are swapped.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
# Cache1, Cache2, Pointer, Hits, Misses
stats = [{}, {}, 0, 0, 0]
@functools.wraps(user_function)
def wrapper(*args):
ptr = stats[2]
a = stats[ptr]
b = stats[not ptr]
key = args
if key in a:
stats[3] += 1 # Hit
return a[key]
elif key in b:
stats[3] += 1 # Hit
return b[key]
else:
stats[4] += 1 # Miss
result = user_function(*args)
a[key] = result
if len(a) >= maxsize:
stats[2] = not ptr
b.clear()
return result
def cache_info():
return stats[3], stats[4], maxsize, len(stats[0]) + len(stats[1])
def cache_clear():
"""Clear the cache and cache statistics"""
stats[0].clear()
stats[1].clear()
stats[3] = stats[4] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def clockface_lru_cache(maxsize=100):
"""Least-recently-used cache decorator.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library, but
uses the clock face LRU algorithm instead of an ordered dictionary.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize)
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0, 0] # hits, misses, hand
data = {}
if maxsize:
# The keys at each point on the clock face
clock_keys = [None] * maxsize
# The "referenced" bits at each point on the clock face
clock_refs = array("B", (0 for _ in xrange(maxsize)))
lock = Lock()
@functools.wraps(user_function)
def wrapper(*args):
key = args
try:
with lock:
pos, result = data[key]
# The key is in the cache. Set the key's reference bit
clock_refs[pos] = 1
# Record a cache hit
stats[0] += 1
except KeyError:
# Compute the value
result = user_function(*args)
with lock:
# Current position of the clock hand
hand = stats[2]
# Remember to stop here after a full revolution
end = hand
# Sweep around the clock looking for a position with
# the reference bit off
while True:
hand = (hand + 1) % maxsize
current_ref = clock_refs[hand]
if current_ref:
# This position's "referenced" bit is set. Turn
# the bit off and move on.
clock_refs[hand] = 0
elif not current_ref or hand == end:
# We've either found a position with the
# "reference" bit off or reached the end of the
# circular cache. So we'll replace this
# position with the new key
current_key = clock_keys[hand]
if current_key in data:
del data[current_key]
clock_keys[hand] = key
clock_refs[hand] = 1
break
# Put the key and result in the cache
data[key] = (hand, result)
# Save the new hand position
stats[2] = hand
# Record a cache miss
stats[1] += 1
return result
else:
@functools.wraps(user_function)
def wrapper(*args):
key = args
try:
result = data[key]
stats[0] += 1
except KeyError:
result = user_function(*args)
data[key] = result
stats[1] += 1
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
"""Clear the cache and cache statistics"""
data.clear()
stats[0] = stats[1] = stats[2] = 0
for i in xrange(maxsize):
clock_keys[i] = None
clock_refs[i] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function