Auto merge of #138881 - scottmcm:more-chaining-ord, r=Mark-Simulacrum

Use the chaining methods on PartialOrd for slices too

#138135 added these doc-hidden trait methods to improve the tuple codegen.  This PR adds more implementations and callers so that the codegen for slice (and array) comparisons also improves.
This commit is contained in:
bors 2025-04-13 11:47:23 +00:00
commit 53d4476111
4 changed files with 262 additions and 32 deletions

View file

@ -2053,6 +2053,22 @@ mod impls {
fn ge(&self, other: &&B) -> bool {
PartialOrd::ge(*self, *other)
}
#[inline]
fn __chaining_lt(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_lt(*self, *other)
}
#[inline]
fn __chaining_le(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_le(*self, *other)
}
#[inline]
fn __chaining_gt(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_gt(*self, *other)
}
#[inline]
fn __chaining_ge(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_ge(*self, *other)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: ?Sized> Ord for &A
@ -2108,6 +2124,22 @@ mod impls {
fn ge(&self, other: &&mut B) -> bool {
PartialOrd::ge(*self, *other)
}
#[inline]
fn __chaining_lt(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_lt(*self, *other)
}
#[inline]
fn __chaining_le(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_le(*self, *other)
}
#[inline]
fn __chaining_gt(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_gt(*self, *other)
}
#[inline]
fn __chaining_ge(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_ge(*self, *other)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: ?Sized> Ord for &mut A

View file

@ -5,6 +5,7 @@ use crate::ascii;
use crate::cmp::{self, BytewiseEq, Ordering};
use crate::intrinsics::compare_bytes;
use crate::num::NonZero;
use crate::ops::ControlFlow;
#[stable(feature = "rust1", since = "1.0.0")]
impl<T, U> PartialEq<[U]> for [T]
@ -31,12 +32,64 @@ impl<T: Ord> Ord for [T] {
}
}
#[inline]
fn as_underlying(x: ControlFlow<bool>) -> u8 {
// SAFETY: This will only compile if `bool` and `ControlFlow<bool>` have the same
// size (which isn't guaranteed but this is libcore). Because they have the same
// size, it's a niched implementation, which in one byte means there can't be
// any uninitialized memory. The callers then only check for `0` or `1` from this,
// which must necessarily match the `Break` variant, and we're fine no matter
// what ends up getting picked as the value representing `Continue(())`.
unsafe { crate::mem::transmute(x) }
}
/// Implements comparison of slices [lexicographically](Ord#lexicographical-comparison).
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: PartialOrd> PartialOrd for [T] {
#[inline]
fn partial_cmp(&self, other: &[T]) -> Option<Ordering> {
SlicePartialOrd::partial_compare(self, other)
}
#[inline]
fn lt(&self, other: &Self) -> bool {
// This is certainly not the obvious way to implement these methods.
// Unfortunately, using anything that looks at the discriminant means that
// LLVM sees a check for `2` (aka `ControlFlow<bool>::Continue(())`) and
// gets very distracted by that, ending up generating extraneous code.
// This should be changed to something simpler once either LLVM is smarter,
// see <https://github.com/llvm/llvm-project/issues/132678>, or we generate
// niche discriminant checks in a way that doesn't trigger it.
as_underlying(self.__chaining_lt(other)) == 1
}
#[inline]
fn le(&self, other: &Self) -> bool {
as_underlying(self.__chaining_le(other)) != 0
}
#[inline]
fn gt(&self, other: &Self) -> bool {
as_underlying(self.__chaining_gt(other)) == 1
}
#[inline]
fn ge(&self, other: &Self) -> bool {
as_underlying(self.__chaining_ge(other)) != 0
}
#[inline]
fn __chaining_lt(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_lt(self, other)
}
#[inline]
fn __chaining_le(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_le(self, other)
}
#[inline]
fn __chaining_gt(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_gt(self, other)
}
#[inline]
fn __chaining_ge(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_ge(self, other)
}
}
#[doc(hidden)]
@ -99,26 +152,65 @@ trait SlicePartialOrd: Sized {
fn partial_compare(left: &[Self], right: &[Self]) -> Option<Ordering>;
}
#[doc(hidden)]
// intermediate trait for specialization of slice's PartialOrd chaining methods
trait SliceChain: Sized {
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
}
type AlwaysBreak<B> = ControlFlow<B, crate::convert::Infallible>;
impl<A: PartialOrd> SlicePartialOrd for A {
default fn partial_compare(left: &[A], right: &[A]) -> Option<Ordering> {
let l = cmp::min(left.len(), right.len());
// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];
for i in 0..l {
match lhs[i].partial_cmp(&rhs[i]) {
Some(Ordering::Equal) => (),
non_eq => return non_eq,
}
}
left.len().partial_cmp(&right.len())
let elem_chain = |a, b| match PartialOrd::partial_cmp(a, b) {
Some(Ordering::Equal) => ControlFlow::Continue(()),
non_eq => ControlFlow::Break(non_eq),
};
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::partial_cmp(a, b));
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
b
}
}
impl<A: PartialOrd> SliceChain for A {
default fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_lt, usize::__chaining_lt)
}
default fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_le, usize::__chaining_le)
}
default fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_gt, usize::__chaining_gt)
}
default fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_ge, usize::__chaining_ge)
}
}
#[inline]
fn chaining_impl<'l, 'r, A: PartialOrd, B, C>(
left: &'l [A],
right: &'r [A],
elem_chain: impl Fn(&'l A, &'r A) -> ControlFlow<B>,
len_chain: impl for<'a> FnOnce(&'a usize, &'a usize) -> ControlFlow<B, C>,
) -> ControlFlow<B, C> {
let l = cmp::min(left.len(), right.len());
// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];
for i in 0..l {
elem_chain(&lhs[i], &rhs[i])?;
}
len_chain(&left.len(), &right.len())
}
// This is the impl that we would like to have. Unfortunately it's not sound.
// See `partial_ord_slice.rs`.
/*
@ -165,21 +257,13 @@ trait SliceOrd: Sized {
impl<A: Ord> SliceOrd for A {
default fn compare(left: &[Self], right: &[Self]) -> Ordering {
let l = cmp::min(left.len(), right.len());
// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];
for i in 0..l {
match lhs[i].cmp(&rhs[i]) {
Ordering::Equal => (),
non_eq => return non_eq,
}
}
left.len().cmp(&right.len())
let elem_chain = |a, b| match Ord::cmp(a, b) {
Ordering::Equal => ControlFlow::Continue(()),
non_eq => ControlFlow::Break(non_eq),
};
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::cmp(a, b));
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
b
}
}
@ -191,7 +275,7 @@ impl<A: Ord> SliceOrd for A {
/// * For every `x` and `y` of this type, `Ord(x, y)` must return the same
/// value as `Ord::cmp(transmute::<_, u8>(x), transmute::<_, u8>(y))`.
#[rustc_specialization_trait]
unsafe trait UnsignedBytewiseOrd {}
unsafe trait UnsignedBytewiseOrd: Ord {}
unsafe impl UnsignedBytewiseOrd for bool {}
unsafe impl UnsignedBytewiseOrd for u8 {}
@ -225,6 +309,38 @@ impl<A: Ord + UnsignedBytewiseOrd> SliceOrd for A {
}
}
// Don't generate our own chaining loops for `memcmp`-able things either.
impl<A: PartialOrd + UnsignedBytewiseOrd> SliceChain for A {
#[inline]
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_lt()),
}
}
#[inline]
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_le()),
}
}
#[inline]
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_gt()),
}
}
#[inline]
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_ge()),
}
}
}
pub(super) trait SliceContains: Sized {
fn slice_contains(&self, x: &[Self]) -> bool;
}

View file

@ -2,7 +2,7 @@
use crate::cmp::Ordering::{self, *};
use crate::marker::{ConstParamTy_, StructuralPartialEq, UnsizedConstParamTy};
use crate::ops::ControlFlow::{Break, Continue};
use crate::ops::ControlFlow::{self, Break, Continue};
// Recursive macro for implementing n-ary tuple functions and operations
//
@ -95,6 +95,22 @@ macro_rules! tuple_impls {
fn gt(&self, other: &($($T,)+)) -> bool {
lexical_ord!(gt, __chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_lt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_lt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_le(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_le, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_gt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_ge(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_ge, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
}
}
@ -187,6 +203,17 @@ macro_rules! lexical_ord {
};
}
// Same parameter interleaving as `lexical_ord` above
macro_rules! lexical_chain {
($chain_rel: ident, $a:expr, $b:expr $(,$rest_a:expr, $rest_b:expr)*) => {{
PartialOrd::$chain_rel(&$a, &$b)?;
lexical_chain!($chain_rel $(,$rest_a, $rest_b)*)
}};
($chain_rel: ident) => {
Continue(())
};
}
macro_rules! lexical_partial_cmp {
($a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {
match ($a).partial_cmp(&$b) {

View file

@ -1,6 +1,7 @@
// Ensure the asm for array comparisons is properly optimized.
//@ compile-flags: -C opt-level=2
//@ needs-deterministic-layouts (checks depend on tuple layout)
#![crate_type = "lib"]
@ -17,3 +18,57 @@ pub fn compare() -> bool {
[0x00, 0x00, 0x48, 0x41]
}
}
// CHECK-LABEL: @array_of_tuple_le
#[no_mangle]
pub fn array_of_tuple_le(a: &[(i16, u16); 2], b: &[(i16, u16); 2]) -> bool {
// Ensure that, after all the optimizations have run, the happy path just checks
// `eq` on each corresponding pair and moves onto the next one if it is.
// Then there's a dedup'd comparison for the place that's different.
// (As opposed to, say, running a full `[su]cmp` as part of checking equality.)
// This is written quite specifically because different library code was triggering
// <https://github.com/llvm/llvm-project/issues/132678> along the way, so this
// has enough checks to make sure that's not happening. It doesn't need to be
// *exactly* this IR, but be careful if you ever need to update these checks.
// CHECK: start:
// CHECK: %[[A00:.+]] = load i16, ptr %a
// CHECK: %[[B00:.+]] = load i16, ptr %b
// CHECK-NOT: cmp
// CHECK: %[[EQ00:.+]] = icmp eq i16 %[[A00]], %[[B00]]
// CHECK-NEXT: br i1 %[[EQ00]], label %[[L01:.+]], label %[[EXIT_S:.+]]
// CHECK: [[L01]]:
// CHECK: %[[PA01:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 2
// CHECK: %[[PB01:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 2
// CHECK: %[[A01:.+]] = load i16, ptr %[[PA01]]
// CHECK: %[[B01:.+]] = load i16, ptr %[[PB01]]
// CHECK-NOT: cmp
// CHECK: %[[EQ01:.+]] = icmp eq i16 %[[A01]], %[[B01]]
// CHECK-NEXT: br i1 %[[EQ01]], label %[[L10:.+]], label %[[EXIT_U:.+]]
// CHECK: [[L10]]:
// CHECK: %[[PA10:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 4
// CHECK: %[[PB10:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 4
// CHECK: %[[A10:.+]] = load i16, ptr %[[PA10]]
// CHECK: %[[B10:.+]] = load i16, ptr %[[PB10]]
// CHECK-NOT: cmp
// CHECK: %[[EQ10:.+]] = icmp eq i16 %[[A10]], %[[B10]]
// CHECK-NEXT: br i1 %[[EQ10]], label %[[L11:.+]], label %[[EXIT_S]]
// CHECK: [[L11]]:
// CHECK: %[[PA11:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 6
// CHECK: %[[PB11:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 6
// CHECK: %[[A11:.+]] = load i16, ptr %[[PA11]]
// CHECK: %[[B11:.+]] = load i16, ptr %[[PB11]]
// CHECK-NOT: cmp
// CHECK: %[[EQ11:.+]] = icmp eq i16 %[[A11]], %[[B11]]
// CHECK-NEXT: br i1 %[[EQ11]], label %[[DONE:.+]], label %[[EXIT_U]]
// CHECK: [[DONE]]:
// CHECK: %[[RET:.+]] = phi i1 [ %{{.+}}, %[[EXIT_S]] ], [ %{{.+}}, %[[EXIT_U]] ], [ true, %[[L11]] ]
// CHECK: ret i1 %[[RET]]
a <= b
}