auto merge of #13539 : Aatch/rust/vector-copy-faster, r=thestinger
LLVM wasn't recognising the loops as memcpy loops and was therefore failing to optimise them properly. While improving LLVM is the "proper" way to fix this, I think that these cases are important enough to warrant a little low-level optimisation. Fixes #13472 r? @thestinger --- Benchmark Results: ``` --- Before --- test clone_owned ... bench: 6126104 ns/iter (+/- 285962) = 170 MB/s test clone_owned_to_owned ... bench: 6125054 ns/iter (+/- 271197) = 170 MB/s test clone_str ... bench: 80586 ns/iter (+/- 11489) = 13011 MB/s test clone_vec ... bench: 3903220 ns/iter (+/- 658556) = 268 MB/s test test_memcpy ... bench: 69401 ns/iter (+/- 2168) = 15108 MB/s --- After --- test clone_owned ... bench: 70839 ns/iter (+/- 4931) = 14801 MB/s test clone_owned_to_owned ... bench: 70286 ns/iter (+/- 4836) = 14918 MB/s test clone_str ... bench: 78519 ns/iter (+/- 5511) = 13353 MB/s test clone_vec ... bench: 71415 ns/iter (+/- 1999) = 14682 MB/s test test_memcpy ... bench: 70980 ns/iter (+/- 2126) = 14772 MB/s ```
This commit is contained in:
commit
f39ba69aaa
2 changed files with 38 additions and 5 deletions
|
@ -760,9 +760,25 @@ impl<'a, T: Clone> CloneableVector<T> for &'a [T] {
|
|||
/// Returns a copy of `v`.
|
||||
#[inline]
|
||||
fn to_owned(&self) -> ~[T] {
|
||||
let mut result = with_capacity(self.len());
|
||||
for e in self.iter() {
|
||||
result.push((*e).clone());
|
||||
let len = self.len();
|
||||
let mut result = with_capacity(len);
|
||||
// Unsafe code so this can be optimised to a memcpy (or something
|
||||
// similarly fast) when T is Copy. LLVM is easily confused, so any
|
||||
// extra operations during the loop can prevent this optimisation
|
||||
unsafe {
|
||||
let mut i = 0;
|
||||
let p = result.as_mut_ptr();
|
||||
// Use try_finally here otherwise the write to length
|
||||
// inside the loop stops LLVM from optimising this.
|
||||
try_finally(
|
||||
&mut i, (),
|
||||
|i, ()| while *i < len {
|
||||
mem::move_val_init(
|
||||
&mut(*p.offset(*i as int)),
|
||||
self.unsafe_ref(*i).clone());
|
||||
*i += 1;
|
||||
},
|
||||
|i| result.set_len(*i));
|
||||
}
|
||||
result
|
||||
}
|
||||
|
@ -2584,7 +2600,8 @@ pub mod bytes {
|
|||
impl<A: Clone> Clone for ~[A] {
|
||||
#[inline]
|
||||
fn clone(&self) -> ~[A] {
|
||||
self.iter().map(|item| item.clone()).collect()
|
||||
// Use the fast to_owned on &[A] for cloning
|
||||
self.as_slice().to_owned()
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &~[A]) {
|
||||
|
|
|
@ -311,7 +311,23 @@ impl<T: Clone> Vec<T> {
|
|||
|
||||
impl<T:Clone> Clone for Vec<T> {
|
||||
fn clone(&self) -> Vec<T> {
|
||||
self.iter().map(|x| x.clone()).collect()
|
||||
let len = self.len;
|
||||
let mut vector = Vec::with_capacity(len);
|
||||
// Unsafe code so this can be optimised to a memcpy (or something
|
||||
// similarly fast) when T is Copy. LLVM is easily confused, so any
|
||||
// extra operations during the loop can prevent this optimisation
|
||||
{
|
||||
let this_slice = self.as_slice();
|
||||
while vector.len < len {
|
||||
unsafe {
|
||||
mem::move_val_init(
|
||||
vector.as_mut_slice().unsafe_mut_ref(vector.len),
|
||||
this_slice.unsafe_ref(vector.len).clone());
|
||||
}
|
||||
vector.len += 1;
|
||||
}
|
||||
}
|
||||
vector
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, other: &Vec<T>) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue