Skip to content

Commit fd4fa59

Browse files
committed
Faster SIMD approach
1 parent fe4cc13 commit fd4fa59

File tree

2 files changed

+50
-16
lines changed

2 files changed

+50
-16
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
277277
| 8 | [Memory Maneuver](https://adventofcode.com/2018/day/8) | [Source](src/year2018/day08.rs) | 24 |
278278
| 9 | [Marble Mania](https://adventofcode.com/2018/day/9) | [Source](src/year2018/day09.rs) | 909 |
279279
| 10 | [The Stars Align](https://adventofcode.com/2018/day/10) | [Source](src/year2018/day10.rs) | 11 |
280-
| 11 | [Chronal Charge](https://adventofcode.com/2018/day/11) | [Source](src/year2018/day11.rs) | 1193 |
280+
| 11 | [Chronal Charge](https://adventofcode.com/2018/day/11) | [Source](src/year2018/day11.rs) | 542 |
281281
| 12 | [Subterranean Sustainability](https://adventofcode.com/2018/day/12) | [Source](src/year2018/day12.rs) | 77 |
282282
| 13 | [Mine Cart Madness](https://adventofcode.com/2018/day/13) | [Source](src/year2018/day13.rs) | 349 |
283283
| 14 | [Chocolate Charts](https://adventofcode.com/2018/day/14) | [Source](src/year2018/day14.rs) | 24000 |

src/year2018/day11.rs

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ use crate::util::parse::*;
99
use crate::util::thread::*;
1010

1111
pub struct Result {
12+
size: usize,
1213
x: usize,
1314
y: usize,
14-
size: usize,
1515
power: i32,
1616
}
1717

1818
pub fn parse(input: &str) -> Vec<Result> {
1919
let grid_serial_number: i32 = input.signed();
2020

21-
// Build Summed-area table.
22-
let mut sat = vec![0; 301 * 301];
21+
// Build Summed-area table. Add a little extra buffer to the end for the SIMD variant.
22+
let mut sat = vec![0; 301 * 301 + 32];
2323

2424
for y in 1..301 {
2525
for x in 1..301 {
@@ -39,7 +39,9 @@ pub fn parse(input: &str) -> Vec<Result> {
3939
// Use as many cores as possible to parallelize the search.
4040
// Smaller sizes take more time so use work stealing to keep all cores busy.
4141
let items: Vec<_> = (1..301).collect();
42-
let result = spawn_parallel_iterator(&items, |iter| worker(&sat, iter));
42+
let result = spawn_parallel_iterator(&items, |iter| {
43+
iter.map(|&size| square(&sat, size)).collect::<Vec<_>>()
44+
});
4345
result.into_iter().flatten().collect()
4446
}
4547

@@ -49,27 +51,21 @@ pub fn part1(input: &[Result]) -> String {
4951
}
5052

5153
pub fn part2(input: &[Result]) -> String {
52-
let Result { x, y, size, .. } = input.iter().max_by_key(|r| r.power).unwrap();
54+
let Result { size, x, y, .. } = input.iter().max_by_key(|r| r.power).unwrap();
5355
format!("{x},{y},{size}")
5456
}
5557

56-
fn worker(sat: &[i32], iter: ParIter<'_, usize>) -> Vec<Result> {
57-
iter.map(|&size| {
58-
let (power, x, y) = square(sat, size);
59-
Result { x, y, size, power }
60-
})
61-
.collect()
62-
}
63-
6458
/// Find the (x,y) coordinates and max power for a square of the specified size.
65-
fn square(sat: &[i32], size: usize) -> (i32, usize, usize) {
59+
#[cfg(not(feature = "simd"))]
60+
fn square(sat: &[i32], size: usize) -> Result {
6661
let mut max_power = i32::MIN;
6762
let mut max_x = 0;
6863
let mut max_y = 0;
6964

7065
for y in size..301 {
7166
for x in size..301 {
7267
let index = 301 * y + x;
68+
7369
let power =
7470
sat[index] - sat[index - size] - sat[index - 301 * size] + sat[index - 302 * size];
7571

@@ -81,5 +77,43 @@ fn square(sat: &[i32], size: usize) -> (i32, usize, usize) {
8177
}
8278
}
8379

84-
(max_power, max_x, max_y)
80+
Result { size, x: max_x, y: max_y, power: max_power }
81+
}
82+
83+
/// Same as the scalar version but prcessing 16 lanes simultaneously.
84+
#[cfg(feature = "simd")]
85+
fn square(sat: &[i32], size: usize) -> Result {
86+
use std::simd::cmp::SimdPartialOrd as _;
87+
use std::simd::*;
88+
89+
const LANE_WIDTH: usize = 16;
90+
type Vector = Simd<i32, LANE_WIDTH>;
91+
92+
let mut max_power = i32::MIN;
93+
let mut max_x = 0;
94+
let mut max_y = 0;
95+
96+
for y in size..301 {
97+
for x in (size..301).step_by(LANE_WIDTH) {
98+
let index = 301 * y + x;
99+
100+
let power: Vector = Simd::from_slice(&sat[index..])
101+
- Simd::from_slice(&sat[index - size..])
102+
- Simd::from_slice(&sat[index - 301 * size..])
103+
+ Simd::from_slice(&sat[index - 302 * size..]);
104+
105+
if power.simd_gt(Simd::splat(max_power)).any() {
106+
let limit = 301 - x;
107+
for (offset, power) in power.to_array().into_iter().enumerate().take(limit) {
108+
if power > max_power {
109+
max_power = power;
110+
max_x = x - size + 1 + offset;
111+
max_y = y - size + 1;
112+
}
113+
}
114+
}
115+
}
116+
}
117+
118+
Result { size, x: max_x, y: max_y, power: max_power }
85119
}

0 commit comments

Comments
 (0)