1
Fork 0

auto merge of #13539 : Aatch/rust/vector-copy-faster, r=thestinger

LLVM wasn't recognising the loops as memcpy loops and was therefore failing to optimise them properly. While improving LLVM is the "proper" way to fix this, I think that these cases are important enough to warrant a little low-level optimisation.

Fixes #13472 

r? @thestinger 

---

Benchmark Results:

```
--- Before ---
test clone_owned          ... bench:   6126104 ns/iter (+/- 285962) = 170 MB/s
test clone_owned_to_owned ... bench:   6125054 ns/iter (+/- 271197) = 170 MB/s
test clone_str            ... bench:     80586 ns/iter (+/- 11489) = 13011 MB/s
test clone_vec            ... bench:   3903220 ns/iter (+/- 658556) = 268 MB/s
test test_memcpy          ... bench:     69401 ns/iter (+/- 2168) = 15108 MB/s

--- After ---
test clone_owned          ... bench:     70839 ns/iter (+/- 4931) = 14801 MB/s
test clone_owned_to_owned ... bench:     70286 ns/iter (+/- 4836) = 14918 MB/s
test clone_str            ... bench:     78519 ns/iter (+/- 5511) = 13353 MB/s
test clone_vec            ... bench:     71415 ns/iter (+/- 1999) = 14682 MB/s
test test_memcpy          ... bench:     70980 ns/iter (+/- 2126) = 14772 MB/s
```
This commit is contained in:
bors 2014-04-16 03:36:27 -07:00
commit f39ba69aaa
2 changed files with 38 additions and 5 deletions

View file

@ -760,9 +760,25 @@ impl<'a, T: Clone> CloneableVector<T> for &'a [T] {
/// Returns a copy of `v`.
#[inline]
fn to_owned(&self) -> ~[T] {
let mut result = with_capacity(self.len());
for e in self.iter() {
result.push((*e).clone());
let len = self.len();
let mut result = with_capacity(len);
// Unsafe code so this can be optimised to a memcpy (or something
// similarly fast) when T is Copy. LLVM is easily confused, so any
// extra operations during the loop can prevent this optimisation
unsafe {
let mut i = 0;
let p = result.as_mut_ptr();
// Use try_finally here otherwise the write to length
// inside the loop stops LLVM from optimising this.
try_finally(
&mut i, (),
|i, ()| while *i < len {
mem::move_val_init(
&mut(*p.offset(*i as int)),
self.unsafe_ref(*i).clone());
*i += 1;
},
|i| result.set_len(*i));
}
result
}
@ -2584,7 +2600,8 @@ pub mod bytes {
impl<A: Clone> Clone for ~[A] {
#[inline]
fn clone(&self) -> ~[A] {
self.iter().map(|item| item.clone()).collect()
// Use the fast to_owned on &[A] for cloning
self.as_slice().to_owned()
}
fn clone_from(&mut self, source: &~[A]) {

View file

@ -311,7 +311,23 @@ impl<T: Clone> Vec<T> {
impl<T:Clone> Clone for Vec<T> {
fn clone(&self) -> Vec<T> {
self.iter().map(|x| x.clone()).collect()
let len = self.len;
let mut vector = Vec::with_capacity(len);
// Unsafe code so this can be optimised to a memcpy (or something
// similarly fast) when T is Copy. LLVM is easily confused, so any
// extra operations during the loop can prevent this optimisation
{
let this_slice = self.as_slice();
while vector.len < len {
unsafe {
mem::move_val_init(
vector.as_mut_slice().unsafe_mut_ref(vector.len),
this_slice.unsafe_ref(vector.len).clone());
}
vector.len += 1;
}
}
vector
}
fn clone_from(&mut self, other: &Vec<T>) {