microproduct/atmosphericDelay/ISCEApp/site-packages/hypothesis/internal/conjecture/utils.py

483 lines
17 KiB
Python

# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Most of this work is copyright (C) 2013-2021 David R. MacIver
# (david@drmaciver.com), but it contains contributions by others. See
# CONTRIBUTING.rst for a full list of people who may hold copyright, and
# consult the git log if you need to determine who owns an individual
# contribution.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER
import enum
import hashlib
import heapq
import math
import sys
from collections import OrderedDict, abc
from functools import lru_cache
from hypothesis.errors import InvalidArgument
from hypothesis.internal.compat import floor, int_from_bytes
from hypothesis.internal.floats import int_to_float
LABEL_MASK = 2 ** 64 - 1
def calc_label_from_name(name: str) -> int:
hashed = hashlib.sha384(name.encode()).digest()
return int_from_bytes(hashed[:8])
def calc_label_from_cls(cls: type) -> int:
return calc_label_from_name(cls.__qualname__)
def combine_labels(*labels: int) -> int:
label = 0
for l in labels:
label = (label << 1) & LABEL_MASK
label ^= l
return label
INTEGER_RANGE_DRAW_LABEL = calc_label_from_name("another draw in integer_range()")
BIASED_COIN_LABEL = calc_label_from_name("biased_coin()")
BIASED_COIN_INNER_LABEL = calc_label_from_name("inside biased_coin()")
SAMPLE_IN_SAMPLER_LABEL = calc_label_from_name("a sample() in Sampler")
ONE_FROM_MANY_LABEL = calc_label_from_name("one more from many()")
def unbounded_integers(data):
size = INT_SIZES[INT_SIZES_SAMPLER.sample(data)]
r = data.draw_bits(size)
sign = r & 1
r >>= 1
if sign:
r = -r
return int(r)
def integer_range(data, lower, upper, center=None):
assert lower <= upper
if lower == upper:
# Write a value even when this is trivial so that when a bound depends
# on other values we don't suddenly disappear when the gap shrinks to
# zero - if that happens then often the data stream becomes misaligned
# and we fail to shrink in cases where we really should be able to.
data.draw_bits(1, forced=0)
return int(lower)
if center is None:
center = lower
center = min(max(center, lower), upper)
if center == upper:
above = False
elif center == lower:
above = True
else:
above = not boolean(data)
if above:
gap = upper - center
else:
gap = center - lower
assert gap > 0
bits = gap.bit_length()
probe = gap + 1
if bits > 24 and data.draw_bits(3):
# For large ranges, we combine the uniform random distribution from draw_bits
# with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
# choice of unicode characters is uniform but the 32bit distribution is not.
idx = INT_SIZES_SAMPLER.sample(data)
bits = min(bits, INT_SIZES[idx])
while probe > gap:
data.start_example(INTEGER_RANGE_DRAW_LABEL)
probe = data.draw_bits(bits)
data.stop_example(discard=probe > gap)
if above:
result = center + probe
else:
result = center - probe
assert lower <= result <= upper
return int(result)
def check_sample(values, strategy_name):
if "numpy" in sys.modules and isinstance(values, sys.modules["numpy"].ndarray):
if values.ndim != 1:
raise InvalidArgument(
"Only one-dimensional arrays are supported for sampling, "
f"and the given value has {values.ndim} dimensions (shape "
f"{values.shape}). This array would give samples of array slices "
"instead of elements! Use np.ravel(values) to convert "
"to a one-dimensional array, or tuple(values) if you "
"want to sample slices."
)
elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)):
raise InvalidArgument(
f"Cannot sample from {values!r}, not an ordered collection. "
f"Hypothesis goes to some length to ensure that the {strategy_name} "
"strategy has stable results between runs. To replay a saved "
"example, the sampled values must have the same iteration order "
"on every run - ruling out sets, dicts, etc due to hash "
"randomisation. Most cases can simply use `sorted(values)`, but "
"mixed types or special values such as math.nan require careful "
"handling - and note that when simplifying an example, "
"Hypothesis treats earlier values as simpler."
)
if isinstance(values, range):
return values
return tuple(values)
def choice(data, values):
return values[integer_range(data, 0, len(values) - 1)]
FLOAT_PREFIX = 0b1111111111 << 52
FULL_FLOAT = int_to_float(FLOAT_PREFIX | ((2 << 53) - 1)) - 1
def fractional_float(data):
return (int_to_float(FLOAT_PREFIX | data.draw_bits(52)) - 1) / FULL_FLOAT
def boolean(data):
return bool(data.draw_bits(1))
def biased_coin(data, p, *, forced=None):
"""Return True with probability p (assuming a uniform generator),
shrinking towards False. If ``forced`` is set to a non-None value, this
will always return that value but will write choices appropriate to having
drawn that value randomly."""
# NB this function is vastly more complicated than it may seem reasonable
# for it to be. This is because it is used in a lot of places and it's
# important for it to shrink well, so it's worth the engineering effort.
if p <= 0 or p >= 1:
bits = 1
else:
# When there is a meaningful draw, in order to shrink well we will
# set things up so that 0 and 1 always correspond to False and True
# respectively. This means we want enough bits available that in a
# draw we will always have at least one truthy value and one falsey
# value.
bits = math.ceil(-math.log(min(p, 1 - p), 2))
# In order to avoid stupidly large draws where the probability is
# effectively zero or one, we treat probabilities of under 2^-64 to be
# effectively zero.
if bits > 64:
# There isn't enough precision near one for this to occur for values
# far from 0.
p = 0.0
bits = 1
size = 2 ** bits
data.start_example(BIASED_COIN_LABEL)
while True:
# The logic here is a bit complicated and special cased to make it
# play better with the shrinker.
# We imagine partitioning the real interval [0, 1] into 256 equal parts
# and looking at each part and whether its interior is wholly <= p
# or wholly >= p. At most one part can be neither.
# We then pick a random part. If it's wholly on one side or the other
# of p then we use that as the answer. If p is contained in the
# interval then we start again with a new probability that is given
# by the fraction of that interval that was <= our previous p.
# We then take advantage of the fact that we have control of the
# labelling to make this shrink better, using the following tricks:
# If p is <= 0 or >= 1 the result of this coin is certain. We make sure
# to write a byte to the data stream anyway so that these don't cause
# difficulties when shrinking.
if p <= 0:
data.draw_bits(1, forced=0)
result = False
elif p >= 1:
data.draw_bits(1, forced=1)
result = True
else:
falsey = floor(size * (1 - p))
truthy = floor(size * p)
remainder = size * p - truthy
if falsey + truthy == size:
partial = False
else:
partial = True
if forced is None:
# We want to get to the point where True is represented by
# 1 and False is represented by 0 as quickly as possible, so
# we use the remove_discarded machinery in the shrinker to
# achieve that by discarding any draws that are > 1 and writing
# a suitable draw into the choice sequence at the end of the
# loop.
data.start_example(BIASED_COIN_INNER_LABEL)
i = data.draw_bits(bits)
data.stop_example(discard=i > 1)
else:
i = data.draw_bits(bits, forced=int(forced))
# We always label the region that causes us to repeat the loop as
# 255 so that shrinking this byte never causes us to need to draw
# more data.
if partial and i == size - 1:
p = remainder
continue
if falsey == 0:
# Every other partition is truthy, so the result is true
result = True
elif truthy == 0:
# Every other partition is falsey, so the result is false
result = False
elif i <= 1:
# We special case so that zero is always false and 1 is always
# true which makes shrinking easier because we can always
# replace a truthy block with 1. This has the slightly weird
# property that shrinking from 2 to 1 can cause the result to
# grow, but the shrinker always tries 0 and 1 first anyway, so
# this will usually be fine.
result = bool(i)
else:
# Originally everything in the region 0 <= i < falsey was false
# and everything above was true. We swapped one truthy element
# into this region, so the region becomes 0 <= i <= falsey
# except for i = 1. We know i > 1 here, so the test for truth
# becomes i > falsey.
result = i > falsey
if i > 1: # pragma: no branch
# Thanks to bytecode optimisations on CPython >= 3.7 and PyPy
# (see https://bugs.python.org/issue2506), coverage incorrectly
# thinks that this condition is always true. You can trivially
# check by adding `else: assert False` and running the tests.
data.draw_bits(bits, forced=int(result))
break
data.stop_example()
return result
class Sampler:
"""Sampler based on Vose's algorithm for the alias method. See
http://www.keithschwarz.com/darts-dice-coins/ for a good explanation.
The general idea is that we store a table of triples (base, alternate, p).
base. We then pick a triple uniformly at random, and choose its alternate
value with probability p and else choose its base value. The triples are
chosen so that the resulting mixture has the right distribution.
We maintain the following invariants to try to produce good shrinks:
1. The table is in lexicographic (base, alternate) order, so that choosing
an earlier value in the list always lowers (or at least leaves
unchanged) the value.
2. base[i] < alternate[i], so that shrinking the draw always results in
shrinking the chosen element.
"""
def __init__(self, weights):
n = len(weights)
self.table = [[i, None, None] for i in range(n)]
total = sum(weights)
num_type = type(total)
zero = num_type(0)
one = num_type(1)
small = []
large = []
probabilities = [w / total for w in weights]
scaled_probabilities = []
for i, p in enumerate(probabilities):
scaled = p * n
scaled_probabilities.append(scaled)
if scaled == 1:
self.table[i][2] = zero
elif scaled < 1:
small.append(i)
else:
large.append(i)
heapq.heapify(small)
heapq.heapify(large)
while small and large:
lo = heapq.heappop(small)
hi = heapq.heappop(large)
assert lo != hi
assert scaled_probabilities[hi] > one
assert self.table[lo][1] is None
self.table[lo][1] = hi
self.table[lo][2] = one - scaled_probabilities[lo]
scaled_probabilities[hi] = (
scaled_probabilities[hi] + scaled_probabilities[lo]
) - one
if scaled_probabilities[hi] < 1:
heapq.heappush(small, hi)
elif scaled_probabilities[hi] == 1:
self.table[hi][2] = zero
else:
heapq.heappush(large, hi)
while large:
self.table[large.pop()][2] = zero
while small:
self.table[small.pop()][2] = zero
for entry in self.table:
assert entry[2] is not None
if entry[1] is None:
entry[1] = entry[0]
elif entry[1] < entry[0]:
entry[0], entry[1] = entry[1], entry[0]
entry[2] = one - entry[2]
self.table.sort()
def sample(self, data):
data.start_example(SAMPLE_IN_SAMPLER_LABEL)
i = integer_range(data, 0, len(self.table) - 1)
base, alternate, alternate_chance = self.table[i]
use_alternate = biased_coin(data, alternate_chance)
data.stop_example()
if use_alternate:
return alternate
else:
return base
INT_SIZES = (8, 16, 32, 64, 128)
INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5))
class many:
"""Utility class for collections. Bundles up the logic we use for "should I
keep drawing more values?" and handles starting and stopping examples in
the right place.
Intended usage is something like:
elements = many(data, ...)
while elements.more():
add_stuff_to_result()
"""
def __init__(self, data, min_size, max_size, average_size):
assert 0 <= min_size <= average_size <= max_size
self.min_size = min_size
self.max_size = max_size
self.data = data
self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size)
self.count = 0
self.rejections = 0
self.drawn = False
self.force_stop = False
self.rejected = False
def more(self):
"""Should I draw another element to add to the collection?"""
if self.drawn:
self.data.stop_example(discard=self.rejected)
self.drawn = True
self.rejected = False
self.data.start_example(ONE_FROM_MANY_LABEL)
if self.min_size == self.max_size:
should_continue = self.count < self.min_size
else:
forced_result = None
if self.force_stop:
forced_result = False
elif self.count < self.min_size:
forced_result = True
elif self.count >= self.max_size:
forced_result = False
should_continue = biased_coin(
self.data, self.p_continue, forced=forced_result
)
if should_continue:
self.count += 1
return True
else:
self.data.stop_example()
return False
def reject(self):
"""Reject the last example (i.e. don't count it towards our budget of
elements because it's not going to go in the final collection)."""
assert self.count > 0
self.count -= 1
self.rejections += 1
self.rejected = True
# We set a minimum number of rejections before we give up to avoid
# failing too fast when we reject the first draw.
if self.rejections > max(3, 2 * self.count):
if self.count < self.min_size:
self.data.mark_invalid()
else:
self.force_stop = True
@lru_cache()
def _calc_p_continue(desired_avg, max_size):
"""Return the p_continue which will generate the desired average size."""
if desired_avg == max_size:
return 1.0
p_continue = 1 - 1.0 / (1 + desired_avg)
if p_continue == 0 or max_size == float("inf"):
assert 0 <= p_continue < 1, p_continue
return p_continue
# For small max_size, the infinite-series p_continue is a poor approximation,
# and while we can't solve the polynomial a few rounds of iteration quickly
# gets us a good approximate solution in almost all cases (sometimes exact!).
while _p_continue_to_avg(p_continue, max_size) > desired_avg:
# This is impossible over the reals, but *can* happen with floats.
p_continue -= 0.0001
# Let's binary-search our way to a better estimate! We tried fancier options
# like gradient descent, but this is numerically stable and works better.
hi = 1.0
while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01:
assert p_continue < hi
mid = (p_continue + hi) / 2
if _p_continue_to_avg(mid, max_size) <= desired_avg:
p_continue = mid
else:
hi = mid
assert 0 < p_continue < 1
assert _p_continue_to_avg(p_continue, max_size) <= desired_avg
return p_continue
def _p_continue_to_avg(p_continue, max_size):
"""Return the average_size generated by this p_continue and max_size."""
if p_continue >= 1:
return max_size
return (1.0 / (1 - p_continue) - 1) * (1 - p_continue ** max_size)