@@ -9,17 +9,17 @@ use crate::util::parse::*;
99use crate :: util:: thread:: * ;
1010
1111pub struct Result {
12+ size : usize ,
1213 x : usize ,
1314 y : usize ,
14- size : usize ,
1515 power : i32 ,
1616}
1717
1818pub fn parse ( input : & str ) -> Vec < Result > {
1919 let grid_serial_number: i32 = input. signed ( ) ;
2020
21- // Build Summed-area table.
22- let mut sat = vec ! [ 0 ; 301 * 301 ] ;
21+ // Build Summed-area table. Add a little extra buffer to the end for the SIMD variant.
22+ let mut sat = vec ! [ 0 ; 301 * 301 + 32 ] ;
2323
2424 for y in 1 ..301 {
2525 for x in 1 ..301 {
@@ -39,7 +39,9 @@ pub fn parse(input: &str) -> Vec<Result> {
3939 // Use as many cores as possible to parallelize the search.
4040 // Smaller sizes take more time so use work stealing to keep all cores busy.
4141 let items: Vec < _ > = ( 1 ..301 ) . collect ( ) ;
42- let result = spawn_parallel_iterator ( & items, |iter| worker ( & sat, iter) ) ;
42+ let result = spawn_parallel_iterator ( & items, |iter| {
43+ iter. map ( |& size| square ( & sat, size) ) . collect :: < Vec < _ > > ( )
44+ } ) ;
4345 result. into_iter ( ) . flatten ( ) . collect ( )
4446}
4547
@@ -49,27 +51,21 @@ pub fn part1(input: &[Result]) -> String {
4951}
5052
5153pub fn part2 ( input : & [ Result ] ) -> String {
52- let Result { x , y , size , .. } = input. iter ( ) . max_by_key ( |r| r. power ) . unwrap ( ) ;
54+ let Result { size , x , y , .. } = input. iter ( ) . max_by_key ( |r| r. power ) . unwrap ( ) ;
5355 format ! ( "{x},{y},{size}" )
5456}
5557
56- fn worker ( sat : & [ i32 ] , iter : ParIter < ' _ , usize > ) -> Vec < Result > {
57- iter. map ( |& size| {
58- let ( power, x, y) = square ( sat, size) ;
59- Result { x, y, size, power }
60- } )
61- . collect ( )
62- }
63-
6458/// Find the (x,y) coordinates and max power for a square of the specified size.
65- fn square ( sat : & [ i32 ] , size : usize ) -> ( i32 , usize , usize ) {
59+ #[ cfg( not( feature = "simd" ) ) ]
60+ fn square ( sat : & [ i32 ] , size : usize ) -> Result {
6661 let mut max_power = i32:: MIN ;
6762 let mut max_x = 0 ;
6863 let mut max_y = 0 ;
6964
7065 for y in size..301 {
7166 for x in size..301 {
7267 let index = 301 * y + x;
68+
7369 let power =
7470 sat[ index] - sat[ index - size] - sat[ index - 301 * size] + sat[ index - 302 * size] ;
7571
@@ -81,5 +77,43 @@ fn square(sat: &[i32], size: usize) -> (i32, usize, usize) {
8177 }
8278 }
8379
84- ( max_power, max_x, max_y)
80+ Result { size, x : max_x, y : max_y, power : max_power }
81+ }
82+
83+ /// Same as the scalar version but prcessing 16 lanes simultaneously.
84+ #[ cfg( feature = "simd" ) ]
85+ fn square ( sat : & [ i32 ] , size : usize ) -> Result {
86+ use std:: simd:: cmp:: SimdPartialOrd as _;
87+ use std:: simd:: * ;
88+
89+ const LANE_WIDTH : usize = 16 ;
90+ type Vector = Simd < i32 , LANE_WIDTH > ;
91+
92+ let mut max_power = i32:: MIN ;
93+ let mut max_x = 0 ;
94+ let mut max_y = 0 ;
95+
96+ for y in size..301 {
97+ for x in ( size..301 ) . step_by ( LANE_WIDTH ) {
98+ let index = 301 * y + x;
99+
100+ let power: Vector = Simd :: from_slice ( & sat[ index..] )
101+ - Simd :: from_slice ( & sat[ index - size..] )
102+ - Simd :: from_slice ( & sat[ index - 301 * size..] )
103+ + Simd :: from_slice ( & sat[ index - 302 * size..] ) ;
104+
105+ if power. simd_gt ( Simd :: splat ( max_power) ) . any ( ) {
106+ let limit = 301 - x;
107+ for ( offset, power) in power. to_array ( ) . into_iter ( ) . enumerate ( ) . take ( limit) {
108+ if power > max_power {
109+ max_power = power;
110+ max_x = x - size + 1 + offset;
111+ max_y = y - size + 1 ;
112+ }
113+ }
114+ }
115+ }
116+ }
117+
118+ Result { size, x : max_x, y : max_y, power : max_power }
85119}
0 commit comments