199 lines
6.6 KiB
Python
199 lines
6.6 KiB
Python
# Copyright 2007 Matt Chaput. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
|
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
|
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# The views and conclusions contained in the software and documentation are
|
|
# those of the authors and should not be interpreted as representing official
|
|
# policies, either expressed or implied, of Matt Chaput.
|
|
|
|
from __future__ import division
|
|
from array import array
|
|
|
|
from whoosh import matching
|
|
from whoosh.compat import text_type, u, xrange
|
|
from whoosh.query import qcore
|
|
|
|
|
|
class WrappingQuery(qcore.Query):
|
|
def __init__(self, child):
|
|
self.child = child
|
|
|
|
def __repr__(self):
|
|
return "%s(%r)" % (self.__class__.__name__, self.child)
|
|
|
|
def __hash__(self):
|
|
return hash(self.__class__.__name__) ^ hash(self.child)
|
|
|
|
def _rewrap(self, child):
|
|
return self.__class__(child)
|
|
|
|
def is_leaf(self):
|
|
return False
|
|
|
|
def children(self):
|
|
yield self.child
|
|
|
|
def apply(self, fn):
|
|
return self._rewrap(fn(self.child))
|
|
|
|
def requires(self):
|
|
return self.child.requires()
|
|
|
|
def field(self):
|
|
return self.child.field()
|
|
|
|
def with_boost(self, boost):
|
|
return self._rewrap(self.child.with_boost(boost))
|
|
|
|
def estimate_size(self, ixreader):
|
|
return self.child.estimate_size(ixreader)
|
|
|
|
def estimate_min_size(self, ixreader):
|
|
return self.child.estimate_min_size(ixreader)
|
|
|
|
def matcher(self, searcher, context=None):
|
|
return self.child.matcher(searcher, context)
|
|
|
|
|
|
class Not(qcore.Query):
|
|
"""Excludes any documents that match the subquery.
|
|
|
|
>>> # Match documents that contain 'render' but not 'texture'
|
|
>>> And([Term("content", u"render"),
|
|
... Not(Term("content", u"texture"))])
|
|
>>> # You can also do this
|
|
>>> Term("content", u"render") - Term("content", u"texture")
|
|
"""
|
|
|
|
__inittypes__ = dict(query=qcore.Query)
|
|
|
|
def __init__(self, query, boost=1.0):
|
|
"""
|
|
:param query: A :class:`Query` object. The results of this query
|
|
are *excluded* from the parent query.
|
|
:param boost: Boost is meaningless for excluded documents but this
|
|
keyword argument is accepted for the sake of a consistent
|
|
interface.
|
|
"""
|
|
|
|
self.query = query
|
|
self.boost = boost
|
|
|
|
def __eq__(self, other):
|
|
return other and self.__class__ is other.__class__ and\
|
|
self.query == other.query
|
|
|
|
def __repr__(self):
|
|
return "%s(%s)" % (self.__class__.__name__, repr(self.query))
|
|
|
|
def __unicode__(self):
|
|
return u("NOT ") + text_type(self.query)
|
|
|
|
__str__ = __unicode__
|
|
|
|
def __hash__(self):
|
|
return (hash(self.__class__.__name__)
|
|
^ hash(self.query)
|
|
^ hash(self.boost))
|
|
|
|
def is_leaf(self):
|
|
return False
|
|
|
|
def children(self):
|
|
yield self.query
|
|
|
|
def apply(self, fn):
|
|
return self.__class__(fn(self.query))
|
|
|
|
def normalize(self):
|
|
q = self.query.normalize()
|
|
if q is qcore.NullQuery:
|
|
return q
|
|
else:
|
|
return self.__class__(q, boost=self.boost)
|
|
|
|
def field(self):
|
|
return None
|
|
|
|
def estimate_size(self, ixreader):
|
|
return ixreader.doc_count()
|
|
|
|
def estimate_min_size(self, ixreader):
|
|
return 1 if ixreader.doc_count() else 0
|
|
|
|
def matcher(self, searcher, context=None):
|
|
# Usually only called if Not is the root query. Otherwise, queries such
|
|
# as And and Or do special handling of Not subqueries.
|
|
reader = searcher.reader()
|
|
child = self.query.matcher(searcher, searcher.boolean_context())
|
|
return matching.InverseMatcher(child, reader.doc_count_all(),
|
|
missing=reader.is_deleted)
|
|
|
|
|
|
class ConstantScoreQuery(WrappingQuery):
|
|
"""Wraps a query and uses a matcher that always gives a constant score
|
|
to all matching documents. This is a useful optimization when you don't
|
|
care about scores from a certain branch of the query tree because it is
|
|
simply acting as a filter. See also the :class:`AndMaybe` query.
|
|
"""
|
|
|
|
def __init__(self, child, score=1.0):
|
|
WrappingQuery.__init__(self, child)
|
|
self.score = score
|
|
|
|
def __eq__(self, other):
|
|
return (other and self.__class__ is other.__class__
|
|
and self.child == other.child and self.score == other.score)
|
|
|
|
def __hash__(self):
|
|
return hash(self.child) ^ hash(self.score)
|
|
|
|
def _rewrap(self, child):
|
|
return self.__class__(child, self.score)
|
|
|
|
def matcher(self, searcher, context=None):
|
|
from whoosh.searching import SearchContext
|
|
|
|
context = context or SearchContext()
|
|
m = self.child.matcher(searcher, context)
|
|
if context.needs_current or isinstance(m, matching.NullMatcherClass):
|
|
return m
|
|
else:
|
|
ids = array("I", m.all_ids())
|
|
return matching.ListMatcher(ids, all_weights=self.score,
|
|
term=m.term())
|
|
|
|
|
|
class WeightingQuery(WrappingQuery):
|
|
"""Wraps a query and uses a specific :class:`whoosh.sorting.WeightingModel`
|
|
to score documents that match the wrapped query.
|
|
"""
|
|
|
|
def __init__(self, child, weighting):
|
|
WrappingQuery.__init__(self, child)
|
|
self.weighting = weighting
|
|
|
|
def matcher(self, searcher, context=None):
|
|
# Replace the passed-in weighting with the one configured on this query
|
|
context.set(weighting=self.weighting)
|
|
return self.child.matcher(searcher, context)
|