Skip to content

Commit 0bd5ef4

Browse files
68412542+EagleAglow@users.noreply.github.com68412542+EagleAglow@users.noreply.github.com
authored andcommitted
2 parents 516c9ac + 42d3bff commit 0bd5ef4

35 files changed

+3041
-1127
lines changed

README.md

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,19 @@ With this, we now have the official hash: `4256d19d3e134d79cc6f160d428a1d859ce96
129129

130130
There's also an archived version of the [baseline output](./data/baseline.output.gz)
131131

132+
## Small File Hashes
133+
134+
For easier comparison with the baseline, here are the hashes for different generated row counts:
135+
136+
| Lines | Input File Hash | Output File Hash |
137+
|--:|----------------:|---------:|
138+
| 1_000 | `0be4844a2417c08a85a44b26509bbe6868a6f65d0e0d087d3f9ceedc02f5ceaa` | `d42c37ca405f230e91dd0a75e6741dbbbcddd2338963ea0f0e727cf90ecbd7e7` |
139+
| 10_000 | `447380628ca25b1c9901c2e62e01591f2e2f794d2888934a5e9d4a67d72346a5` | `b4dd36d80a63fefdccbff50ffaaef7e2092034935c729b3330569c7c7f7351fc` |
140+
| 100_000 | `dd3a4821e91de82e44f17f65b1951af8a21652b92c20a7e53a1fa02ea6e5fbd2` | `c9e50d46bba327727bf4b412ec0401e0c2e59c9035b94b288e15631ca621cb52` |
141+
| 1_000_000 | `c2955973c3db29bf544655c11d2d3c7ac902c9f65014026b210bd25eb1876c0c` | `5fedbd9811660ee3423f979a0c854ec8b70da3e804170bc39bcc400c94f93bc0` |
142+
| 10_000_000 | `90193d314e991f7789258c8b6b06c493a4d624991e203b12343c2a8ce1d0c7fd` | `2f3a6383b3bc83a9ad53fc0773de2da57bd4add8a51662cdb86bfca502d276a3` |
143+
| 100_000_000 | `f55384da4646a0c77a1d5dd94a58f8430c5956fe180cedcb17b4425fe5389a39` | `7e8339b5d268fa400a93887b7a1140ac1adf683a8e837e6274fd71e383c26c6b` |
144+
132145
## Differences From Original
133146
I've decided that I would want this challenge to be turned way up to 11!
134147

@@ -154,18 +167,18 @@ These are the results from running all entries into the challenge on my personal
154167

155168
| # | Result (m:s.ms) | Compiler | Submitter | Notes | Certificates |
156169
|--:|----------------:|---------:|:----------|:------|:-------------|
157-
| 1 | 0:1.651 | lazarus-3.0, fpc-3.2.2 | Arnaud Bouchez | Using `mORMot2`, 32 threads | |
158-
| 2 | 0:2.606 | lazarus-3.99, fpc-3.3.1 | O Coddo | Using `SCL`, 32 threads | |
159-
| 3 | 0:5.884 | lazarus-3.0, fpc-3.2.2 | Georges Hatem | Free Pascal: Using 32 thread | |
160-
| 4 | 0:9.808 | lazarus-3.99, fpc-3.3.1 | G Klark | Using 32 threads | |
161-
| 5 | 0:15.059 | lazarus-3.0, fpc-3.2.2 | Székely Balázs | Using 32 threads | |
162-
| 6 | 0:53.488 | lazarus-3.0, fpc-3.2.2 | Hartmut Grosser | Using 1 thread | |
163-
| 7 | 1:15.706 | lazarus-3.0, fpc-3.2.2 | Richard Lawson | Using 1 thread | |
164-
| 8 | 2:27.407 | lazarus-3.0, fpc-3.2.2 | Iwan Kelaiah | Using 1 thread | |
165-
| 9 | 6:55.406 | delphi 12.1 | David Cornelius | Using 1 thread | |
166-
| 10 | 7:4.147 | delphi 12.1 | Brian Fire | Using 1 thread | |
167-
| 🟠 | 0:19.460 | lazarus-3.0, fpc-3.2.2 | Lurendrejer Aksen | Using 32 threads **(failed hash)** | |
168-
| 🟠 | 15:4.100 | delphi 12.1 | Daniel Töpfl | Using 1 thread **(failed hash)** | |
170+
| 1 | 0:1.260 | lazarus-3.99, fpc-3.3.1 | Arnaud Bouchez | Using `mORMot2`, 32 threads | |
171+
| 2 | 0:2.006 | lazarus-3.99, fpc-3.3.1 | O Coddo | Using `SCL`, 32 threads | |
172+
| 3 | 0:3.164 | lazarus-3.99, fpc-3.3.1 | Georges Hatem - FPC | Free Pascal: Using 32 thread | |
173+
| 4 | 0:9.652 | lazarus-3.99, fpc-3.3.1 | G Klark | Using 32 threads | |
174+
| 5 | 0:13.388 | lazarus-3.99, fpc-3.3.1 | Székely Balázs | Using 32 threads | |
175+
| 6 | 0:18.007 | lazarus-3.99, fpc-3.3.1 | Lurendrejer Aksen | Using 32 threads | |
176+
| 7 | 0:52.486 | lazarus-3.99, fpc-3.3.1 | Hartmut Grosser | Using 1 thread | |
177+
| 8 | 1:10.017 | lazarus-3.99, fpc-3.3.1 | Richard Lawson | Using 1 thread | |
178+
| 9 | 2:36.787 | lazarus-3.0, fpc-3.2.2 | Iwan Kelaiah | Using 1 thread | |
179+
| 10 | 6:49.745 | delphi 12.1 | David Cornelius | Using 1 thread | |
180+
| 11 | 8:34.438 | delphi 12.1 | Daniel Töpfl | Using 1 thread | |
181+
| 12 | 16:6.862 | delphi 12.1 | Brian Fire | Using 1 thread | |
169182

170183

171184
> **NOTE**

entries/abouchez/README.md

Lines changed: 201 additions & 65 deletions
Large diffs are not rendered by default.

entries/abouchez/src/brcmormot.lpr

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ constructor TBrcThread.Create(owner: TBrcMain);
254254

255255
const
256256
HASHSIZE = 1 shl 18; // slightly oversized to avoid most collisions
257+
// we tried with a prime constant for fast modulo mult-by-reciprocal: slower
257258

258259
constructor TBrcMain.Create(const fn: TFileName; threads, chunkmb, max: integer;
259260
affinity, fullsearch: boolean);
@@ -561,28 +562,25 @@ function TBrcMain.SortedText: RawUtf8;
561562
begin
562563
assert(SizeOf(TBrcStation) <= 64 div 4); // 64 = CPU L1 cache line size
563564
// read command line parameters
564-
Executable.Command.ExeDescription := 'The mORMot One Billion Row Challenge';
565-
if Executable.Command.Arg(0, 'the data source #filename') then
566-
Utf8ToFileName(Executable.Command.Args[0], fn{%H-});
567-
verbose := Executable.Command.Option(
568-
['v', 'verbose'], 'generate verbose output with timing');
569-
affinity := Executable.Command.Option(
570-
['a', 'affinity'], 'force thread affinity to a single CPU core');
571-
full := Executable.Command.Option(
572-
['f', 'full'], 'force full name lookup (disable "perfect hash" trick)');
573-
Executable.Command.Get(
574-
['t', 'threads'], threads, '#number of threads to run',
575-
SystemInfo.dwNumberOfProcessors);
576-
Executable.Command.Get(
577-
['c', 'chunk'], chunkmb, 'size in #megabytes used for per-thread chunking', 16);
578-
help := Executable.Command.Option(['h', 'help'], 'display this help');
579-
if Executable.Command.ConsoleWriteUnknown then
580-
exit
581-
else if help or
582-
(fn = '') then
565+
with Executable.Command do
583566
begin
584-
ConsoleWrite(Executable.Command.FullDescription);
585-
exit;
567+
ExeDescription := 'The mORMot One Billion Row Challenge';
568+
if Arg(0, 'the data source #filename') then
569+
Utf8ToFileName(Executable.Command.Args[0], fn{%H-});
570+
verbose := Option(['v', 'verbose'], 'generate verbose output with timing');
571+
affinity := Option(['a', 'affinity'], 'force thread affinity to a single CPU core');
572+
full := Option(['f', 'full'], 'force full name lookup (disable "perfect hash" trick)');
573+
Get(['t', 'threads'], threads, '#number of thread workers', SystemInfo.dwNumberOfProcessors);
574+
Get(['c', 'chunk'], chunkmb, 'size in #megabytes for per-thread chunking', 16);
575+
help := Option(['h', 'help'], 'display this help');
576+
if ConsoleWriteUnknown then
577+
exit
578+
else if help or
579+
(fn = '') then
580+
begin
581+
ConsoleWrite(FullDescription);
582+
exit;
583+
end;
586584
end;
587585
// actual process
588586
if verbose then

entries/ghatem-fpc/README.md

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
## Usage
88
- -t flag to specify the thread-count (default reads the thread-count available on the CPU)
99

10-
currently there are 3 configurations that can be compiled / run:
11-
- `HASHMOD`: uses modulus for hashing, least collisions
12-
- `HASHMULT`: alternative hashing, more collisions, faster on my PC, but seemingly slower on test PCs
13-
- `LEMIRE`: faster hash function calculation, most collisions it seems, yet the fastest on my PC
10+
currently there are 2 versions that can be compiled / run:
11+
- `OneBRC.lpr -> ghatem `: all threads share the station names - involves locking
12+
- `OneBRC-nosharedname.lpr -> ghatem-nosharedname`: each thread maintains a copy of the station names - no locking involved
13+
- `OneBRC-smallrec.lpr -> ghatem-smallrec `: same as OneBRC, but the StationData's "count" is UInt16 instead of 32. Will likely fail to match hash on the 5B rows test
1414

1515
## Hardware + Environment
1616
host:
@@ -247,3 +247,31 @@ Another trial with various hash functions, a simple modulus vs. a slightly more
247247
Can be tested with the HASHMULT build option
248248

249249
Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC.
250+
251+
## v.6 (2024-05-04)
252+
253+
As of the latest results executed by Paweld, there are two main bottlenecks throttling the entire implementation, according to CallGrind and KCacheGrind:
254+
- function ExtractLineData, 23% of total cost, of which 9% is due to `fpc_stackcheck`
255+
- the hash lookup function, at 40% of total cost
256+
257+
Currently, the hash lookup is done on an array of records. Increasing the array size causes slowness, and reducing it causes further collisions.
258+
Will try to see how to reduce collisions (increase array size), all while minimizing the cost of cache misses.
259+
260+
Edit:
261+
The goal is to both:
262+
- minimize collisions on the hashes (keys) by having a good hash function, but also increase the size of the keys storage
263+
- minimize the size of the array of packed records
264+
265+
The idea:
266+
- the dictionary will no longer point to a PStationData pointer, but rather to an index between 0 and StationCount, where the record is stored in the array.
267+
- -> data about the same station will be stored at the same index for all threads' data-arrays
268+
- -> names will also be stored at that same index upon first encounter, and is common to all threads
269+
- no locking needs to occur when the key is already found, since there is no multiple-write occurring
270+
- the data-arrays are pre-allocated, and a atomic-counter will be incremented to know where the next element will be stored.
271+
272+
Thinking again, this is likely similar to the approach mentioned by @synopse in one of his comments.
273+
274+
For the ExtractLineData, three ideas to try implementing:
275+
- avoid using a function, to get rid of the cost of stack checking
276+
- reduce branching, I think it should be possible to go from 3 if-statements, to only 1
277+
- unroll the loop (although I had tried this in the past, did not show any improvements)
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<CONFIG>
3+
<ProjectOptions>
4+
<Version Value="12"/>
5+
<PathDelim Value="\"/>
6+
<General>
7+
<Flags>
8+
<MainUnitHasCreateFormStatements Value="False"/>
9+
<MainUnitHasTitleStatement Value="False"/>
10+
<MainUnitHasScaledStatement Value="False"/>
11+
<CompatibilityMode Value="True"/>
12+
</Flags>
13+
<SessionStorage Value="InProjectDir"/>
14+
<Title Value="1 BRC"/>
15+
<UseAppBundle Value="False"/>
16+
<ResourceType Value="res"/>
17+
</General>
18+
<BuildModes Count="4">
19+
<Item1 Name="Default" Default="True"/>
20+
<Item2 Name="Debug">
21+
<CompilerOptions>
22+
<Version Value="11"/>
23+
<PathDelim Value="\"/>
24+
<Target>
25+
<Filename Value="..\..\..\bin\ghatem-largerec"/>
26+
</Target>
27+
<SearchPaths>
28+
<IncludeFiles Value="$(ProjOutDir)"/>
29+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
30+
</SearchPaths>
31+
<Parsing>
32+
<SyntaxOptions>
33+
<IncludeAssertionCode Value="True"/>
34+
</SyntaxOptions>
35+
</Parsing>
36+
<CodeGeneration>
37+
<Checks>
38+
<IOChecks Value="True"/>
39+
<RangeChecks Value="True"/>
40+
<OverflowChecks Value="True"/>
41+
<StackChecks Value="True"/>
42+
</Checks>
43+
<VerifyObjMethodCallValidity Value="True"/>
44+
</CodeGeneration>
45+
<Linking>
46+
<Debugging>
47+
<DebugInfoType Value="dsDwarf3"/>
48+
<TrashVariables Value="True"/>
49+
<UseValgrind Value="True"/>
50+
<UseExternalDbgSyms Value="True"/>
51+
</Debugging>
52+
</Linking>
53+
<Other>
54+
<CustomOptions Value="-dDEBUG"/>
55+
</Other>
56+
</CompilerOptions>
57+
</Item2>
58+
<Item3 Name="Release">
59+
<CompilerOptions>
60+
<Version Value="11"/>
61+
<PathDelim Value="\"/>
62+
<Target>
63+
<Filename Value="..\..\..\bin\ghatem-largerec"/>
64+
</Target>
65+
<SearchPaths>
66+
<IncludeFiles Value="$(ProjOutDir)"/>
67+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
68+
</SearchPaths>
69+
<CodeGeneration>
70+
<SmartLinkUnit Value="True"/>
71+
<Optimizations>
72+
<OptimizationLevel Value="3"/>
73+
</Optimizations>
74+
</CodeGeneration>
75+
<Linking>
76+
<Debugging>
77+
<GenerateDebugInfo Value="False"/>
78+
<RunWithoutDebug Value="True"/>
79+
</Debugging>
80+
<LinkSmart Value="True"/>
81+
</Linking>
82+
<Other>
83+
<CustomOptions Value="-dRELEASE"/>
84+
</Other>
85+
</CompilerOptions>
86+
</Item3>
87+
<Item4 Name="Valgrind">
88+
<CompilerOptions>
89+
<Version Value="11"/>
90+
<PathDelim Value="\"/>
91+
<Target>
92+
<Filename Value="..\..\..\bin\ghatem-largerec"/>
93+
</Target>
94+
<SearchPaths>
95+
<IncludeFiles Value="$(ProjOutDir)"/>
96+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
97+
</SearchPaths>
98+
<Linking>
99+
<Debugging>
100+
<DebugInfoType Value="dsDwarf3"/>
101+
<TrashVariables Value="True"/>
102+
<UseValgrind Value="True"/>
103+
<UseExternalDbgSyms Value="True"/>
104+
</Debugging>
105+
</Linking>
106+
<Other>
107+
<CustomOptions Value="-dDEBUG"/>
108+
</Other>
109+
</CompilerOptions>
110+
</Item4>
111+
</BuildModes>
112+
<PublishOptions>
113+
<Version Value="2"/>
114+
<UseFileFilters Value="True"/>
115+
</PublishOptions>
116+
<RunParams>
117+
<FormatVersion Value="2"/>
118+
</RunParams>
119+
<RequiredPackages Count="2">
120+
<Item1>
121+
<PackageName Value="mormot2"/>
122+
</Item1>
123+
<Item2>
124+
<PackageName Value="LCLBase"/>
125+
</Item2>
126+
</RequiredPackages>
127+
<Units Count="1">
128+
<Unit0>
129+
<Filename Value="OneBRC-largerec.lpr"/>
130+
<IsPartOfProject Value="True"/>
131+
</Unit0>
132+
</Units>
133+
</ProjectOptions>
134+
<CompilerOptions>
135+
<Version Value="11"/>
136+
<PathDelim Value="\"/>
137+
<Target>
138+
<Filename Value="..\..\..\bin\ghatem-largerec"/>
139+
</Target>
140+
<SearchPaths>
141+
<IncludeFiles Value="$(ProjOutDir)"/>
142+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
143+
</SearchPaths>
144+
<Linking>
145+
<Debugging>
146+
<DebugInfoType Value="dsDwarf3"/>
147+
</Debugging>
148+
</Linking>
149+
<Other>
150+
<CustomOptions Value="-dDEBUG"/>
151+
</Other>
152+
</CompilerOptions>
153+
<Debugging>
154+
<Exceptions Count="3">
155+
<Item1>
156+
<Name Value="EAbort"/>
157+
</Item1>
158+
<Item2>
159+
<Name Value="ECodetoolError"/>
160+
</Item2>
161+
<Item3>
162+
<Name Value="EFOpenError"/>
163+
</Item3>
164+
</Exceptions>
165+
</Debugging>
166+
</CONFIG>

0 commit comments

Comments
 (0)