Skip to content

Commit abc6f04

Browse files
authored
Merge pull request #135 from georges-hatem/main
undo custom dict size, add option for different hash function
2 parents 61d1c72 + 6b54eac commit abc6f04

File tree

5 files changed

+49
-21
lines changed

5 files changed

+49
-21
lines changed

entries/ghatem-fpc/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,12 @@ a few performance improvements, and measurements as per gcarreno on a busy machi
233233
- using mORMot's `crc32c` function instead of the native `crc32`, time dropped to 3.8 seconds
234234
- I had removed my pre-allocated records implementation. restored it in the custom dictionary class, time dropped to 3.2 seconds
235235
- skipping a few chars that we don't need to bother with, no timing yet
236+
237+
## v.5 (2024-04-27)
238+
239+
Various attempts at dictionary sizes, ranging from 45k to 95k. Even though larger dictionaries reduce collision tremendously, a dictionary of size 45k was still optimal.
240+
241+
Another trial with various hash functions, a simple modulus vs. a slightly more complex one: modulus is slower on my PC, remains to try on the test env.
242+
Can be tested with the HASHMULT build option
243+
244+
Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC.

entries/ghatem-fpc/src/OneBRCproj.lpi

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
<UseAppBundle Value="False"/>
1616
<ResourceType Value="res"/>
1717
</General>
18-
<BuildModes Count="4">
18+
<BuildModes Count="5">
1919
<Item1 Name="Default" Default="True"/>
2020
<Item2 Name="Debug">
2121
<CompilerOptions>
@@ -108,6 +108,35 @@
108108
</Other>
109109
</CompilerOptions>
110110
</Item4>
111+
<Item5 Name="HashMult">
112+
<CompilerOptions>
113+
<Version Value="11"/>
114+
<PathDelim Value="\"/>
115+
<Target>
116+
<Filename Value="..\..\..\bin\ghatem"/>
117+
</Target>
118+
<SearchPaths>
119+
<IncludeFiles Value="$(ProjOutDir)"/>
120+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
121+
</SearchPaths>
122+
<CodeGeneration>
123+
<SmartLinkUnit Value="True"/>
124+
<Optimizations>
125+
<OptimizationLevel Value="3"/>
126+
</Optimizations>
127+
</CodeGeneration>
128+
<Linking>
129+
<Debugging>
130+
<GenerateDebugInfo Value="False"/>
131+
<RunWithoutDebug Value="True"/>
132+
</Debugging>
133+
<LinkSmart Value="True"/>
134+
</Linking>
135+
<Other>
136+
<CustomOptions Value="-dRELEASE -dHASHMULT"/>
137+
</Other>
138+
</CompilerOptions>
139+
</Item5>
111140
</BuildModes>
112141
<PublishOptions>
113142
<Version Value="2"/>

entries/ghatem-fpc/src/OneBRCproj.lpr

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ TOneBRCApp = class(TCustomApplication)
1818
private
1919
FFileName: string;
2020
FThreadCount: Integer;
21-
FDictSize: Integer;
2221
procedure RunOneBRC;
2322
protected
2423
procedure DoRun; override;
@@ -34,7 +33,7 @@ procedure TOneBRCApp.RunOneBRC;
3433
var
3534
vOneBRC: TOneBRC;
3635
begin
37-
vOneBRC := TOneBRC.Create (FThreadCount, FDictSize);
36+
vOneBRC := TOneBRC.Create (FThreadCount);
3837
try
3938
try
4039
vOneBRC.mORMotMMF(FFileName);
@@ -89,17 +88,15 @@ procedure TOneBRCApp.DoRun;
8988
ErrorMsg: String;
9089
begin
9190
// quick check parameters
92-
ErrorMsg:= CheckOptions(Format('%s%s%s%s%s:',[
91+
ErrorMsg:= CheckOptions(Format('%s%s%s%s:',[
9392
cShortOptHelp,
9493
cShortOptThread,
95-
cShortOptDictSize,
9694
cShortOptVersion,
9795
cShortOptInput
9896
]),
9997
[
10098
cLongOptHelp,
10199
cLongOptThread+':',
102-
cLongOptDictSize+':',
103100
cLongOptVersion,
104101
cLongOptInput+':'
105102
]
@@ -129,11 +126,6 @@ procedure TOneBRCApp.DoRun;
129126
FThreadCount := StrToInt (GetOptionValue(cShortOptThread, cLongOptThread));
130127
end;
131128

132-
FDictSize := 45003;
133-
if HasOption(cShortOptDictSize, cLongOptDictSize) then begin
134-
FDictSize := StrToInt (GetOptionValue(cShortOptDictSize, cLongOptDictSize));
135-
end;
136-
137129
if HasOption(cShortOptInput, cLongOptInput) then begin
138130
FFileName := GetOptionValue(
139131
cShortOptInput,

entries/ghatem-fpc/src/baseline.console.pas

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ interface
2121
cLongOptInput = 'input-file';
2222
cShortOptThread: Char = 't';
2323
cLongOptThread = 'threads';
24-
cShortOptDictSize: Char = 's';
25-
cLongOptDictSize = 'size';
2624
{$ELSE}
2725
cOptionHelp: array of string = ['-h', '--help'];
2826
cOptionVersion: array of string = ['-v', '--version'];

entries/ghatem-fpc/src/onebrc.pas

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@ interface
1010

1111
function RoundExDouble(const ATemp: Double): Double; inline;
1212

13-
{$WRITEABLECONST ON}
1413
const
15-
cDictSize: Integer = 45003;
16-
{$WRITEABLECONST OFF}
14+
cDictSize: Integer = 45007;
1715

1816
type
1917

@@ -62,10 +60,10 @@ TOneBRC = class
6260
FThreads: array of TThread;
6361
FStationsDicts: array of TMyDictionary;
6462

65-
procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt); inline;
63+
procedure ExtractLineData(const aStart: Int64; const aEnd: Int64; out aLength: ShortInt; out aTemp: SmallInt);
6664

6765
public
68-
constructor Create (const aThreadCount: UInt16; const aDictSize: Integer);
66+
constructor Create (const aThreadCount: UInt16);
6967
destructor Destroy; override;
7068
function mORMotMMF (const afilename: string): Boolean;
7169
procedure DispatchThreads;
@@ -138,9 +136,13 @@ procedure TMyDictionary.InternalFind(const aKey: Cardinal; out aFound: Boolean;
138136
vDbl: Double;
139137
vOffset: Integer;
140138
begin
139+
{$IFDEF HASHMULT}
141140
vDbl := aKey * cHashConst;
142141
vDbl := vDbl - Trunc (vDbl);
143142
vIdx := Trunc (vDbl * cDictSize);
143+
{$ELSE}
144+
vIdx := aKey mod cDictSize;
145+
{$ENDIF}
144146

145147
aFound := False;
146148

@@ -251,11 +253,9 @@ procedure TOneBRC.ExtractLineData(const aStart: Int64; const aEnd: Int64; out aL
251253

252254
//---------------------------------------------------
253255

254-
constructor TOneBRC.Create (const aThreadCount: UInt16; const aDictSize: Integer);
256+
constructor TOneBRC.Create (const aThreadCount: UInt16);
255257
var I: UInt16;
256258
begin
257-
cDictSize := aDictSize;
258-
259259
FThreadCount := aThreadCount;
260260
SetLength (FStationsDicts, aThreadCount);
261261
SetLength (FThreads, aThreadCount);

0 commit comments

Comments
 (0)