Skip to content

Commit f493b84

Browse files
68412542+EagleAglow@users.noreply.github.com68412542+EagleAglow@users.noreply.github.com
authored andcommitted
2 parents abbd79b + 58bf2bb commit f493b84

File tree

12 files changed

+138
-108
lines changed

12 files changed

+138
-108
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Grab all your threads, reach out to SIMD, or pull any other trick, and create t
1717
<img src="img/1brc.png" alt="1BRC" style="display: block; margin-left: auto; margin-right: auto; margin-bottom:1em; width: 50%;">
1818
</p>
1919

20-
The text file contains temperature values for a range of weather stations. Each row is one measurement in the format `<string: station name>;<double: measurement>`, with the measurement value having exactly one fractional digit.
20+
The text file contains temperature values for a range of weather stations. Each row is one measurement in the format `<string: station name>;<double: measurement>`, with the measurement value having exactly one fractional digit. Rows are separated by a single line feed equal of LF (ascii 10) for consistency with the original challenge - and not CR+LF (ascii 13+10) any more.
2121
The following shows ten rows as an example:
2222

2323
```
@@ -119,7 +119,7 @@ C:> CertUtil -hashfile .\data\measurements.txt SHA256
119119
Get-FileHash .\data\measurements.txt -Algorithm SHA256
120120
```
121121
Expected `SHA256` hash:
122-
`ebad17b266ee9f5cb3d118531f197e6f68c9ab988abc5cb9506e6257e1a52ce6`
122+
`2b48bc2fa0b82d748925a820f43f75df01cc06df7447c7571e52d3962e675960`
123123

124124
## Verify Output File
125125

entries/abouchez/src/brcmormot.lpr

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ procedure ParseLine(var chunk: TBrcChunk); nostackframe; assembler;
111111
setne dl
112112
mov eax, dword ptr [r8 + 1] // eax = xx.x or x.x
113113
shl cl, 3
114-
lea r8, [r8 + rdx + 6] // r8 = next line
114+
lea r8, [r8 + rdx + 5] // r8 = next line (LF only)
115115
shl eax, cl // normalized as xx.x
116116
and eax, $0f000f0f // from ascii to digit
117117
imul rax, rax, 1 + 10 shl 16 + 100 shl 24
@@ -156,14 +156,14 @@ function CompareMem(a, b: PAnsiChar; l: PtrInt): boolean; nostackframe; assemble
156156
cmp rax, rdx
157157
jl @by1
158158
align 8
159-
@by8: mov rcx, qword ptr [rdi + rdx] // branchless for l>=8
159+
@by8: mov rcx, qword ptr [rdi + rdx] // branchless for 8..16 bytes
160160
cmp rcx, qword ptr [rsi + rdx]
161161
jne @set
162162
sub rdx, rax
163163
jz @ok
164164
cmp rax, rdx
165-
jge @by8
166-
mov rcx, qword ptr [rdi + rax] // may overlap
165+
jg @by8
166+
mov rcx, qword ptr [rdi + rax] // compare last 8 bytes - may overlap
167167
cmp rcx, qword ptr [rsi + rax]
168168
@set: sete al
169169
ret
@@ -194,7 +194,7 @@ procedure ParseLine(var chunk: TBrcChunk); inline;
194194
// branchless parsing of the temperature
195195
neg := ord(p[1] <> '-') * 2 - 1; // neg = +1 or -1
196196
inc(p, ord(p[1] = '-')); // ignore '-' sign
197-
chunk.Start := @p[ord(p[2] <> '.') + 6]; // next line
197+
chunk.Start := @p[ord(p[2] <> '.') + 5]; // next line (LF only)
198198
chunk.Value := PtrInt(cardinal((QWord((PCardinal(p + 1)^ shl
199199
(byte(ord(p[2] = '.') shl 3))) and $0f000f0f) *
200200
(1 + 10 shl 16 + 100 shl 24)) shr 24) and cardinal(1023)) * neg;
@@ -216,7 +216,7 @@ function CompareMem(a, b: PAnsiChar; l: PtrInt): boolean;
216216
dec(l, ptrsiz);
217217
if l = 0 then
218218
exit
219-
else if l <= ptrsiz then
219+
else if l < ptrsiz then
220220
continue;
221221
result := PPtrUInt(@a[ptrsiz])^ = PPtrUInt(@b[ptrsiz])^; // may overlap
222222
exit;
@@ -246,7 +246,7 @@ constructor TBrcThread.Create(owner: TBrcMain);
246246
FreeOnTerminate := true;
247247
SetLength(fStation, fOwner.fMax);
248248
InterlockedIncrement(fOwner.fRunning);
249-
inherited Create({suspended=}false);
249+
inherited Create({suspended=}false, {stacksize=}16384);
250250
end;
251251

252252

@@ -274,7 +274,7 @@ constructor TBrcMain.Create(const fn: TFileName; threads, chunkmb, max: integer;
274274
if not fullsearch then
275275
SetLength(fNameHash, fMax);
276276
SetLength(fNameLine, fMax);
277-
// we tried pre-loading a first chunk here but it was not faster
277+
// (we tried pre-loading a first chunk here but it was not faster)
278278
// run the thread workers
279279
core := 0;
280280
cores := SystemInfo.dwNumberOfProcessors;

entries/abouchez/src/brcmormotfullcheck.lpr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ procedure ParseLine(var chunk: TBrcChunk); nostackframe; assembler;
117117
setne dl
118118
mov eax, dword ptr [r8 + 1] // eax = xx.x or x.x
119119
shl cl, 3
120-
lea r8, [r8 + rdx + 6] // r8 = next line
120+
lea r8, [r8 + rdx + 5] // r8 = next line
121121
shl eax, cl // normalized as xx.x
122122
and eax, $0f000f0f // from ascii to digit
123123
imul rax, rax, 1 + 10 shl 16 + 100 shl 24
@@ -208,7 +208,7 @@ procedure ParseLine(var chunk: TBrcChunk); inline;
208208
// branchless parsing of the temperature
209209
neg := ord(p[1] <> '-') * 2 - 1; // neg = +1 or -1
210210
inc(p, ord(p[1] = '-')); // ignore '-' sign
211-
chunk.Start := @p[ord(p[2] <> '.') + 6]; // next line
211+
chunk.Start := @p[ord(p[2] <> '.') + 5]; // next line
212212
chunk.Value := PtrInt(cardinal((QWord((PCardinal(p + 1)^ shl
213213
(byte(ord(p[2] = '.') shl 3))) and $0f000f0f) *
214214
(1 + 10 shl 16 + 100 shl 24)) shr 24) and cardinal(1023)) * neg;

entries/abouchez/src/brcmormotold.lpr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,12 +306,12 @@ procedure TBrcThread.Execute;
306306
begin
307307
// note: the PCardinal(p)^ + "shr and $ff" trick is actually slower
308308
v := (p[0] * 100 + p[1] * 10 + p[3] - (ord('0') * 111)) * neg;
309-
p := @p[6]; // also jump ending $13/$10
309+
p := @p[5]; // also jump ending $10
310310
end
311311
else
312312
begin
313313
v := (p[0] * 10 + p[2] - (ord('0') * 11)) * neg; // x.x
314-
p := @p[5];
314+
p := @p[4];
315315
end;
316316
// store the value
317317
{$ifdef CUSTOMHASH}

entries/abouchez/src/brcmormotpertheadht.lpr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ procedure ParseLine(var chunk: TBrcChunk); nostackframe; assembler;
112112
setne dl
113113
mov eax, dword ptr [r8 + 1] // eax = xx.x or x.x
114114
shl cl, 3
115-
lea r8, [r8 + rdx + 6] // r8 = next line
115+
lea r8, [r8 + rdx + 5] // r8 = next line
116116
shl eax, cl // normalized as xx.x
117117
and eax, $0f000f0f // from ascii to digit
118118
imul rax, rax, 1 + 10 shl 16 + 100 shl 24
@@ -164,7 +164,7 @@ procedure ParseLine(var chunk: TBrcChunk); inline;
164164
// branchless parsing of the temperature
165165
neg := ord(p[1] <> '-') * 2 - 1; // neg = +1 or -1
166166
inc(p, ord(p[1] = '-')); // ignore '-' sign
167-
chunk.Start := @p[ord(p[2] <> '.') + 6]; // next line
167+
chunk.Start := @p[ord(p[2] <> '.') + 5]; // next line
168168
chunk.Value := PtrInt(cardinal((QWord((PCardinal(p + 1)^ shl
169169
(byte(ord(p[2] = '.') shl 3))) and $0f000f0f) *
170170
(1 + 10 shl 16 + 100 shl 24)) shr 24) and cardinal(1023)) * neg;

entries/abouchez/src/brcmormotsharedht.lpr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,12 +411,12 @@ procedure TBrcThread.Execute;
411411
begin
412412
// note: the PCardinal(p)^ + "shr and $ff" trick is actually slower
413413
v := (p[0] * 100 + p[1] * 10 + p[3] - (ord('0') * 111)) * neg;
414-
p := @p[6]; // also jump ending $13/$10
414+
p := @p[5]; // also jump ending $13/$10
415415
end
416416
else
417417
begin
418418
v := (p[0] * 10 + p[2] - (ord('0') * 11)) * neg; // x.x
419-
p := @p[5];
419+
p := @p[4];
420420
end;
421421
// store the value
422422
if s^.Count = 0 then

entries/ghatem-fpc/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,3 +224,12 @@ I implemented my own Dictionary class consisting of two arrays. We compute the m
224224

225225
edit:
226226
quadratic probing improved performance even further. we could probably do better with 2-level hashing, but finding such a hash function is going to take a lot of trials, this is probably acceptable results
227+
228+
**ACTUAL TIMING (busy machine): ~4 seconds as per gcarreno**
229+
230+
## v.4 (2024-04-24)
231+
232+
a few performance improvements, and measurements as per gcarreno on a busy machine:
233+
- using mORMot's `crc32c` function instead of the native `crc32`, time dropped to 3.8 seconds
234+
- I had removed my pre-allocated records implementation. restored it in the custom dictionary class, time dropped to 3.2 seconds
235+
- skipping a few chars that we don't need to bother with, no timing yet

entries/ghatem-fpc/src/OneBRCproj.lpi

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
<Linking>
4646
<Debugging>
4747
<DebugInfoType Value="dsDwarf3"/>
48-
<UseHeaptrc Value="True"/>
4948
<TrashVariables Value="True"/>
5049
<UseValgrind Value="True"/>
5150
<UseExternalDbgSyms Value="True"/>

entries/ghatem-fpc/src/onebrc.pas

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int
318318
// the given ending point might be in the middle of a line:
319319
// find the beginning of that line (the last block works well)
320320
while True do begin
321-
if FData[aEndIdx] <> #13 then
321+
if FData[aEndIdx] <> #10 then
322322
Dec (aEndIdx)
323323
else
324324
break;
@@ -328,7 +328,7 @@ procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int
328328
vLineStart := i;
329329

330330
while i < aEndIdx do begin
331-
if FData[i] = #13 then begin
331+
if FData[i] = #10 then begin
332332
// new line parsed, process its contents
333333
ExtractLineData (vLineStart, i - 1, vLenStationName, vTemp);
334334

@@ -360,8 +360,18 @@ procedure TOneBRC.ProcessData (aThreadNb: UInt16; aStartIdx: Int64; aEndIdx: Int
360360
FStationsDicts[aThreadNb].Add (vHash, vData);
361361
end;
362362

363-
// next char is #10, so we can skip 2 instead of 1
364-
vLineStart := i+2;
363+
// we're at a #10: next line starts at the next index
364+
vLineStart := i+1;
365+
366+
// we're at a #10:
367+
// until the next #10 char, there will be:
368+
// - 1 semicolon
369+
// - 3 chars for the temp (min)
370+
// - 2 chars for the name (min)
371+
// - the usual Inc (I)
372+
// so we should be able to skip 7 chars until another #10 may appear
373+
Inc (i, 7);
374+
continue;
365375
end;
366376

367377
Inc (i);

entries/hgrosser/README.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,26 @@
33
**1 billion row Challenge entry**
44

55
## Version
6-
Version 1.51
6+
Version 1.60
77

88
## How to compile
99
The program was developed with FPC 3.2.2 and Lazarus 2.2.4
1010

11+
There is a new Conditional "noCR" which is neccessary, if the input file has no CR's
12+
1113
## How to start
1214
```
13-
Usage: <path to input file> <bit-width for hash-list (14..28)>
14-
Example: hgrosser measurements.txt 15
15+
Usage: <path to input file> <bit-width for hash-list (16..28)>
16+
Example: hgrosser measurements.txt 16
1517
- bit-width for hash-list: sets the size of the hash list, e.g. '16' => 65536 entries
1618
```
17-
There are no switches like `-i` etc, only values.
19+
There are no switches like `-i` etc, only 2 values.
1820

1921
### Optimizing the 2nd command line parameter
2022

21-
In theory the program should run faster with greater bit-widths for the hash-list (because of less collisions), but on my own computer (8 GB RAM) in praxis a small value of 15 is the fastest way, allthough this causes many collisions.
23+
In theory the program should run faster with greater bit-widths for the hash-list (because of less collisions), but on my own computer (8 GB RAM) in praxis a small value of 16 is the fastest way, allthough this causes many collisions.
2224

23-
Please (if possible) try all values from 14 to 24 (maybe in a for-loop). Thanks a lot.
25+
Please (if possible) try all values from 16 to 22 (maybe in a for-loop). Thanks a lot.
2426

2527
## How the program works
2628
The Program works with 1 thread.
@@ -37,3 +39,4 @@ To speed things up:
3739
- Version 1.00: initial version
3840
- Version 1.50: hash-list optimized, small improvements in parsing the file
3941
- Version 1.51: small improvements in asm function
42+
- Version 1.60: hash-list optimized, some minor improvements, Conditional "noCR" added

0 commit comments

Comments
 (0)