Commit 12075f0
committed
Auto merge of rust-lang#123878 - jwong101:inplacecollect, r=jhpratt
optimize inplace collection of Vec
This PR has the following changes:
1. Using `usize::unchecked_mul` in https://github.com/rust-lang/rust/blob/79424056b05eaa9563d16dfab9b9a0c8f033f220/library/alloc/src/vec/in_place_collect.rs#L262 as LLVM, does not know that the operation can't wrap, since that's the size of the original allocation.
Given the following:
```rust
pub struct Foo([usize; 3]);
pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> {
v.into_iter().map(|f| f.0).collect()
}
```
<details>
<summary>Before this commit:</summary>
```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
%me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
%me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
%me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
%me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
%me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
%_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
%0 = udiv i64 %_19.i.idx, 24
; Unnecessary calculation
%_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24
%dst_cap.i.i = udiv i64 %_16.i.i, 24
store i64 %dst_cap.i.i, ptr %_0, align 8
%1 = getelementptr inbounds i8, ptr %_0, i64 8
store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
%2 = getelementptr inbounds i8, ptr %_0, i64 16
store i64 %0, ptr %2, align 8
ret void
}
```
</details>
<details>
<summary>After:</summary>
```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
%me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
%me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
%me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
%me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
%me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
%_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
%0 = udiv i64 %_19.i.idx, 24
store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8
%1 = getelementptr inbounds i8, ptr %_0, i64 8
store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
%2 = getelementptr inbounds i8, ptr %_0, i64 16
store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14
ret void
}
```
</details>
Note that there is still one more `mul,udiv` pair that I couldn't get
rid of. The root cause is the same issue as rust-lang#121239, the `nuw` gets
stripped off of `ptr::sub_ptr`.
2.
`Iterator::try_fold` gets called on the underlying Iterator in
`SpecInPlaceCollect::collect_in_place` whenever it does not implement
`TrustedRandomAccess`. For types that impl `Drop`, LLVM currently can't
tell that the drop can never occur, when using the default
`Iterator::try_fold` implementation.
For example, given the following code from rust-lang#120493
```rust
#[repr(transparent)]
struct WrappedClone {
inner: String
}
#[no_mangle]
pub fn unwrap_clone(list: Vec<WrappedClone>) -> Vec<String> {
list.into_iter().map(|s| s.inner).collect()
}
```
<details>
<summary>The asm for the `unwrap_clone` method is currently:</summary>
```asm
unwrap_clone:
push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
mov rbx, rdi
mov r12, qword ptr [rsi]
mov rdi, qword ptr [rsi + 8]
mov rax, qword ptr [rsi + 16]
movabs rsi, -6148914691236517205
mov r14, r12
test rax, rax
je .LBB0_10
lea rcx, [rax + 2*rax]
lea r14, [r12 + 8*rcx]
shl rax, 3
lea rax, [rax + 2*rax]
xor ecx, ecx
.LBB0_2:
cmp qword ptr [r12 + rcx], 0
je .LBB0_4
add rcx, 24
cmp rax, rcx
jne .LBB0_2
jmp .LBB0_10
.LBB0_4:
lea rdx, [rax - 24]
lea r14, [r12 + rcx]
cmp rdx, rcx
je .LBB0_10
mov qword ptr [rsp], rdi
sub rax, rcx
add rax, -24
mul rsi
mov r15, rdx
lea rbp, [r12 + rcx]
add rbp, 32
shr r15, 4
mov r13, qword ptr [rip + __rust_dealloc@GOTPCREL]
jmp .LBB0_6
.LBB0_8:
add rbp, 24
dec r15
je .LBB0_9
.LBB0_6:
mov rsi, qword ptr [rbp]
test rsi, rsi
je .LBB0_8
mov rdi, qword ptr [rbp - 8]
mov edx, 1
call r13
jmp .LBB0_8
.LBB0_9:
mov rdi, qword ptr [rsp]
movabs rsi, -6148914691236517205
.LBB0_10:
sub r14, r12
mov rax, r14
mul rsi
shr rdx, 4
mov qword ptr [rbx], r12
mov qword ptr [rbx + 8], rdi
mov qword ptr [rbx + 16], rdx
mov rax, rbx
add rsp, 8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
```
</details>
<details>
<summary>After this PR:</summary>
```asm
unwrap_clone:
mov rax, rdi
movups xmm0, xmmword ptr [rsi]
mov rcx, qword ptr [rsi + 16]
movups xmmword ptr [rdi], xmm0
mov qword ptr [rdi + 16], rcx
ret
```
</details>
Fixes rust-lang#1204933 files changed
+58
-2
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
160 | 160 | | |
161 | 161 | | |
162 | 162 | | |
| 163 | + | |
163 | 164 | | |
164 | 165 | | |
165 | 166 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
259 | 259 | | |
260 | 260 | | |
261 | 261 | | |
262 | | - | |
| 262 | + | |
| 263 | + | |
263 | 264 | | |
264 | 265 | | |
265 | 266 | | |
| |||
374 | 375 | | |
375 | 376 | | |
376 | 377 | | |
377 | | - | |
| 378 | + | |
378 | 379 | | |
379 | 380 | | |
380 | 381 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
289 | 289 | | |
290 | 290 | | |
291 | 291 | | |
| 292 | + | |
| 293 | + | |
| 294 | + | |
| 295 | + | |
| 296 | + | |
| 297 | + | |
| 298 | + | |
| 299 | + | |
| 300 | + | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
| 305 | + | |
| 306 | + | |
| 307 | + | |
| 308 | + | |
| 309 | + | |
| 310 | + | |
| 311 | + | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| 321 | + | |
| 322 | + | |
| 323 | + | |
| 324 | + | |
| 325 | + | |
| 326 | + | |
| 327 | + | |
| 328 | + | |
| 329 | + | |
| 330 | + | |
| 331 | + | |
| 332 | + | |
| 333 | + | |
| 334 | + | |
| 335 | + | |
| 336 | + | |
| 337 | + | |
| 338 | + | |
| 339 | + | |
| 340 | + | |
| 341 | + | |
| 342 | + | |
| 343 | + | |
| 344 | + | |
| 345 | + | |
292 | 346 | | |
293 | 347 | | |
294 | 348 | | |
| |||
0 commit comments