1
Fork 0

Rework slice splitting api

This commit is contained in:
Nadrieril 2023-10-03 14:38:40 +02:00
parent 8f9cd3d1e8
commit 590edee320

View file

@ -40,7 +40,7 @@
//! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for //! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for
//! or-patterns; instead we just try the alternatives one-by-one. For details on splitting //! or-patterns; instead we just try the alternatives one-by-one. For details on splitting
//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`IntRange::split`]; for slices, see //! wildcards, see [`SplitWildcard`]; for integer ranges, see [`IntRange::split`]; for slices, see
//! [`SplitVarLenSlice`]. //! [`Slice::split`].
use std::cell::Cell; use std::cell::Cell;
use std::cmp::{self, max, min, Ordering}; use std::cmp::{self, max, min, Ordering};
@ -410,141 +410,130 @@ impl Slice {
fn is_covered_by(self, other: Self) -> bool { fn is_covered_by(self, other: Self) -> bool {
other.kind.covers_length(self.arity()) other.kind.covers_length(self.arity())
} }
}
/// This computes constructor splitting for variable-length slices, as explained at the top of the /// This computes constructor splitting for variable-length slices, as explained at the top of
/// file. /// the file.
/// ///
/// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, _, /// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x,
/// _, y] | ...`. The corresponding value constructors are fixed-length array constructors above a /// _, _, y] | etc`. The corresponding value constructors are fixed-length array constructors of
/// given minimum length. We obviously can't list this infinitude of constructors. Thankfully, /// corresponding lengths. We obviously can't list this infinitude of constructors.
/// it turns out that for each finite set of slice patterns, all sufficiently large array lengths /// Thankfully, it turns out that for each finite set of slice patterns, all sufficiently large
/// are equivalent. /// array lengths are equivalent.
/// ///
/// Let's look at an example, where we are trying to split the last pattern: /// Let's look at an example, where we are trying to split the last pattern:
/// ``` /// ```
/// # fn foo(x: &[bool]) { /// # fn foo(x: &[bool]) {
/// match x { /// match x {
/// [true, true, ..] => {} /// [true, true, ..] => {}
/// [.., false, false] => {} /// [.., false, false] => {}
/// [..] => {} /// [..] => {} // `self`
/// } /// }
/// # } /// # }
/// ``` /// ```
/// Here are the results of specialization for the first few lengths: /// Here are the results of specialization for the first few lengths:
/// ``` /// ```
/// # fn foo(x: &[bool]) { match x { /// # fn foo(x: &[bool]) { match x {
/// // length 0 /// // length 0
/// [] => {} /// [] => {}
/// // length 1 /// // length 1
/// [_] => {} /// [_] => {}
/// // length 2 /// // length 2
/// [true, true] => {} /// [true, true] => {}
/// [false, false] => {} /// [false, false] => {}
/// [_, _] => {} /// [_, _] => {}
/// // length 3 /// // length 3
/// [true, true, _ ] => {} /// [true, true, _ ] => {}
/// [_, false, false] => {} /// [_, false, false] => {}
/// [_, _, _ ] => {} /// [_, _, _ ] => {}
/// // length 4 /// // length 4
/// [true, true, _, _ ] => {} /// [true, true, _, _ ] => {}
/// [_, _, false, false] => {} /// [_, _, false, false] => {}
/// [_, _, _, _ ] => {} /// [_, _, _, _ ] => {}
/// // length 5 /// // length 5
/// [true, true, _, _, _ ] => {} /// [true, true, _, _, _ ] => {}
/// [_, _, _, false, false] => {} /// [_, _, _, false, false] => {}
/// [_, _, _, _, _ ] => {} /// [_, _, _, _, _ ] => {}
/// # _ => {} /// # _ => {}
/// # }} /// # }}
/// ``` /// ```
/// ///
/// If we went above length 5, we would simply be inserting more columns full of wildcards in the /// We see that above length 4, we are simply inserting columns full of wildcards in the middle.
/// middle. This means that the set of witnesses for length `l >= 5` if equivalent to the set for /// This means that specialization and witness computation with slices of length `l >= 4` will
/// any other `l' >= 5`: simply add or remove wildcards in the middle to convert between them. /// give equivalent results independently of `l`. This applies to any set of slice patterns:
/// /// there will be a length `L` above which all lengths behave the same. This is exactly what we
/// This applies to any set of slice patterns: there will be a length `L` above which all lengths /// need for constructor splitting.
/// behave the same. This is exactly what we need for constructor splitting. Therefore a ///
/// variable-length slice can be split into a variable-length slice of minimal length `L`, and many /// A variable-length slice pattern covers all lengths from its arity up to infinity. As we just
/// fixed-length slices of lengths `< L`. /// saw, we can split this in two: lengths below `L` are treated individually with a
/// /// fixed-length slice each; lengths above `L` are grouped into a single variable-length slice
/// For each variable-length pattern `p` with a prefix of length `plₚ` and suffix of length `slₚ`, /// constructor.
/// only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as long as `L` is ///
/// positive (to avoid concerns about empty types), all elements after the maximum prefix length /// For each variable-length slice pattern `p` with a prefix of length `plₚ` and suffix of
/// and before the maximum suffix length are not examined by any variable-length pattern, and /// length `slₚ`, only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as
/// therefore can be added/removed without affecting them - creating equivalent patterns from any /// long as `L` is positive (to avoid concerns about empty types), all elements after the
/// sufficiently-large length. /// maximum prefix length and before the maximum suffix length are not examined by any
/// /// variable-length pattern, and therefore can be ignored. This gives us a way to compute `L`.
/// Of course, if fixed-length patterns exist, we must be sure that our length is large enough to ///
/// miss them all, so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` /// Additionally, if fixed-length patterns exist, we must pick an `L` large enough to miss them,
/// /// so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))`.
/// `max_slice` below will be made to have arity `L`. /// `max_slice` below will be made to have this arity `L`.
#[derive(Debug)] ///
struct SplitVarLenSlice { /// If `self` is fixed-length, it is returned as-is.
/// If the type is an array, this is its size. fn split(self, column_slices: impl Iterator<Item = Slice>) -> impl Iterator<Item = Slice> {
array_len: Option<usize>, // Range of lengths below `L`.
/// The arity of the input slice. let smaller_lengths;
arity: usize, let mut max_slice = self.kind;
/// The smallest slice bigger than any slice seen. `max_slice.arity()` is the length `L` match &mut max_slice {
/// described above. VarLen(max_prefix_len, max_suffix_len) => {
max_slice: SliceKind, // We grow `max_slice` to be larger than all slices encountered, as described above.
} // For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that
// `L = max_prefix_len + max_suffix_len`.
impl SplitVarLenSlice { let mut max_fixed_len = 0;
fn new(prefix: usize, suffix: usize, array_len: Option<usize>) -> Self { for slice in column_slices {
SplitVarLenSlice { array_len, arity: prefix + suffix, max_slice: VarLen(prefix, suffix) } match slice.kind {
} FixedLen(len) => {
max_fixed_len = cmp::max(max_fixed_len, len);
/// Pass a set of slices relative to which to split this one. }
fn split(&mut self, slices: impl Iterator<Item = SliceKind>) { VarLen(prefix, suffix) => {
let VarLen(max_prefix_len, max_suffix_len) = &mut self.max_slice else { *max_prefix_len = cmp::max(*max_prefix_len, prefix);
// No need to split *max_suffix_len = cmp::max(*max_suffix_len, suffix);
return; }
}; }
// We grow `self.max_slice` to be larger than all slices encountered, as described above.
// For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that
// `L = max_prefix_len + max_suffix_len`.
let mut max_fixed_len = 0;
for slice in slices {
match slice {
FixedLen(len) => {
max_fixed_len = cmp::max(max_fixed_len, len);
} }
VarLen(prefix, suffix) => { // We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and
*max_prefix_len = cmp::max(*max_prefix_len, prefix); // suffix separate.
*max_suffix_len = cmp::max(*max_suffix_len, suffix); if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len {
// The subtraction can't overflow thanks to the above check.
// The new `max_prefix_len` is larger than its previous value.
*max_prefix_len = max_fixed_len + 1 - *max_suffix_len;
} }
// We cap the arity of `max_slice` at the array size.
match self.array_len {
Some(len) if max_slice.arity() >= len => max_slice = FixedLen(len),
_ => {}
}
smaller_lengths = match self.array_len {
// The only admissible fixed-length slice is one of the array size. Whether `max_slice`
// is fixed-length or variable-length, it will be the only relevant slice to output
// here.
Some(_) => 0..0, // empty range
// We need to cover all arities in the range `(arity..infinity)`. We split that
// range into two: lengths smaller than `max_slice.arity()` are treated
// independently as fixed-lengths slices, and lengths above are captured by
// `max_slice`.
None => self.arity()..max_slice.arity(),
};
}
FixedLen(_) => {
// No need to split.
smaller_lengths = 0..0;
} }
}
// We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and
// suffix separate.
if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len {
// The subtraction can't overflow thanks to the above check.
// The new `max_prefix_len` is larger than its previous value.
*max_prefix_len = max_fixed_len + 1 - *max_suffix_len;
}
// We cap the arity of `max_slice` at the array size.
match self.array_len {
Some(len) if self.max_slice.arity() >= len => self.max_slice = FixedLen(len),
_ => {}
}
}
/// Iterate over the partition of this slice.
fn iter(&self) -> impl Iterator<Item = Slice> + Captures<'_> {
let smaller_lengths = match self.array_len {
// The only admissible fixed-length slice is one of the array size. Whether `max_slice`
// is fixed-length or variable-length, it will be the only relevant slice to output
// here.
Some(_) => 0..0, // empty range
// We cover all arities in the range `(self.arity..infinity)`. We split that range into
// two: lengths smaller than `max_slice.arity()` are treated independently as
// fixed-lengths slices, and lengths above are captured by `max_slice`.
None => self.arity..self.max_slice.arity(),
}; };
smaller_lengths smaller_lengths
.map(FixedLen) .map(FixedLen)
.chain(once(self.max_slice)) .chain(once(max_slice))
.map(move |kind| Slice::new(self.array_len, kind)) .map(move |kind| Slice::new(self.array_len, kind))
} }
} }
@ -716,11 +705,9 @@ impl<'tcx> Constructor<'tcx> {
let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned(); let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned();
ctor_range.split(int_ranges).map(IntRange).collect() ctor_range.split(int_ranges).map(IntRange).collect()
} }
&Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { &Slice(slice @ Slice { kind: VarLen(..), .. }) => {
let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); let slices = ctors.filter_map(|c| c.as_slice());
let slices = ctors.filter_map(|c| c.as_slice()).map(|s| s.kind); slice.split(slices).map(Slice).collect()
split_self.split(slices);
split_self.iter().map(Slice).collect()
} }
// Any other constructor can be used unchanged. // Any other constructor can be used unchanged.
_ => smallvec![self.clone()], _ => smallvec![self.clone()],