Skip to content

Commit 3bbfc71

Browse files
authored
Merge pull request #165 from georges-hatem/main
2 new implementations, to process data in parts. largerecs and smallrecs
2 parents 1bbdf96 + c108b4a commit 3bbfc71

File tree

6 files changed

+1937
-15
lines changed

6 files changed

+1937
-15
lines changed

entries/ghatem-fpc/README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88
- -t flag to specify the thread-count (default reads the thread-count available on the CPU)
99

1010
currently there are 2 versions that can be compiled / run:
11-
- `OneBRC.lpr -> ghatem `: compact record, optimal for the 1B row / 41k stations, will fail on the other tests due to overflow
12-
- `OneBRC-largerec.lpr -> ghatem-largerec `: same as OneBRC, but the StationData's "count" is UInt32 instead of 16. Passes all the tests
11+
- `OneBRC.lpr -> ghatem `: compact record, optimal for the 1B row / 41k stations, will fail on the other tests due to overflow
12+
- `OneBRC-largerec.lpr -> ghatem-largerec `: same as OneBRC, but the StationData's "count" is UInt32 instead of 16. Passes all the tests
1313

14+
- `OneBRC-parts.lpr -> ghatem-parts `: compact record, processes in parts as attempt stability, optimal for the 1B row / 41k stations, will fail on the other tests due to overflow
15+
- `OneBRC-parts-largerec.lpr -> ghatem-partslarge`: same as OneBRC-parts, but the StationData's "count" is UInt32 instead of 16. Passes all the tests
16+
1417
## Hardware + Environment
1518
host:
1619
- Dell XPS 15 (9560, 2017)

entries/ghatem-fpc/src/OneBRC-largerec.lpr

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -459,8 +459,27 @@ procedure TOneBRC.ProcessData (aThreadNb: TThreadCount; aStartIdx: Int64; aEndId
459459
vLineStart := i;
460460

461461
while i < aEndIdx do begin
462-
while FData[i] <> #10 do begin
463-
Inc (I);
462+
463+
// can still skip some chars
464+
if FData[i] > ';' then begin
465+
Inc (I, 5);
466+
end;
467+
468+
// unroll a few seems to be improving?
469+
if FData[i] <> #10 then begin
470+
Inc (i);
471+
if FData[i] <> #10 then begin
472+
Inc (i);
473+
if FData[i] <> #10 then begin
474+
Inc (I);
475+
if FData[i] <> #10 then begin
476+
Inc (i);
477+
while FData[i] <> #10 do begin
478+
Inc (I);
479+
end;
480+
end;
481+
end;
482+
end;
464483
end;
465484

466485
// new line parsed, process its contents
@@ -557,13 +576,11 @@ function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
557576
//---------------------------------------------------
558577

559578
procedure TOneBRC.GenerateOutput;
560-
var vMean: Integer;
561-
vStream: TStringStream;
562-
I, N: Int64;
579+
var vStream: TStringStream;
580+
I, N: Int32;
563581
vData: PStationData;
564582
vHash: Cardinal;
565583
vStations: TStringList;
566-
iStationName: AnsiString;
567584
vIdx: THashSize;
568585
vRes: Boolean;
569586
begin
@@ -573,10 +590,11 @@ procedure TOneBRC.GenerateOutput;
573590
vStations.UseLocale := False;
574591
try
575592
vStations.BeginUpdate;
576-
for iStationName in FDictionary.FStationNames do begin
577-
if iStationName <> '' then
578-
vStations.Add(iStationName);
593+
for i := 0 to cNumStations - 1 do begin
594+
if FDictionary.FStationNames[i] <> '' then
595+
vStations.Add (FDictionary.FStationNames[i]);
579596
end;
597+
580598
vStations.EndUpdate;
581599
vStations.CustomSort (@Compare);
582600

@@ -593,11 +611,9 @@ procedure TOneBRC.GenerateOutput;
593611
FDictionary.InternalFind (vHash, vRes, vIdx);
594612
vData := @FDictionary.FThreadData[0][FDictionary.FIndexes[vIdx]];
595613

596-
vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
597-
598614
vStream.WriteString(
599615
vStations[i] + '=' + MyFormatInt(vData^.Min)
600-
+ '/' + MyFormatInt(vMean)
616+
+ '/' + MyFormatInt(RoundExInteger(vData^.Sum/vData^.Count/10))
601617
+ '/' + MyFormatInt(vData^.Max) + ', '
602618
);
603619
Inc(I);
@@ -622,7 +638,6 @@ procedure TOneBRC.GenerateOutput;
622638
procedure TBRCThread.Execute;
623639
begin
624640
FProc (FThreadNb, FStart, FEnd);
625-
Terminate;
626641
end;
627642

628643
constructor TBRCThread.Create(aProc: TThreadProc; aThreadNb: TThreadCount; aStart: Int64; aEnd: Int64);
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<CONFIG>
3+
<ProjectOptions>
4+
<Version Value="12"/>
5+
<PathDelim Value="\"/>
6+
<General>
7+
<Flags>
8+
<MainUnitHasCreateFormStatements Value="False"/>
9+
<MainUnitHasTitleStatement Value="False"/>
10+
<MainUnitHasScaledStatement Value="False"/>
11+
<CompatibilityMode Value="True"/>
12+
</Flags>
13+
<SessionStorage Value="InProjectDir"/>
14+
<Title Value="1 BRC"/>
15+
<UseAppBundle Value="False"/>
16+
<ResourceType Value="res"/>
17+
</General>
18+
<BuildModes Count="4">
19+
<Item1 Name="Default" Default="True"/>
20+
<Item2 Name="Debug">
21+
<CompilerOptions>
22+
<Version Value="11"/>
23+
<PathDelim Value="\"/>
24+
<Target>
25+
<Filename Value="..\..\..\bin\ghatem-partslarge"/>
26+
</Target>
27+
<SearchPaths>
28+
<IncludeFiles Value="$(ProjOutDir)"/>
29+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
30+
</SearchPaths>
31+
<Parsing>
32+
<SyntaxOptions>
33+
<IncludeAssertionCode Value="True"/>
34+
</SyntaxOptions>
35+
</Parsing>
36+
<CodeGeneration>
37+
<Checks>
38+
<IOChecks Value="True"/>
39+
<RangeChecks Value="True"/>
40+
<OverflowChecks Value="True"/>
41+
<StackChecks Value="True"/>
42+
</Checks>
43+
<VerifyObjMethodCallValidity Value="True"/>
44+
</CodeGeneration>
45+
<Linking>
46+
<Debugging>
47+
<DebugInfoType Value="dsDwarf3"/>
48+
<TrashVariables Value="True"/>
49+
<UseValgrind Value="True"/>
50+
<UseExternalDbgSyms Value="True"/>
51+
</Debugging>
52+
</Linking>
53+
<Other>
54+
<CustomOptions Value="-dDEBUG"/>
55+
</Other>
56+
</CompilerOptions>
57+
</Item2>
58+
<Item3 Name="Release">
59+
<CompilerOptions>
60+
<Version Value="11"/>
61+
<PathDelim Value="\"/>
62+
<Target>
63+
<Filename Value="..\..\..\bin\ghatem-partslarge"/>
64+
</Target>
65+
<SearchPaths>
66+
<IncludeFiles Value="$(ProjOutDir)"/>
67+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
68+
</SearchPaths>
69+
<CodeGeneration>
70+
<SmartLinkUnit Value="True"/>
71+
<Optimizations>
72+
<OptimizationLevel Value="3"/>
73+
</Optimizations>
74+
</CodeGeneration>
75+
<Linking>
76+
<Debugging>
77+
<GenerateDebugInfo Value="False"/>
78+
<RunWithoutDebug Value="True"/>
79+
</Debugging>
80+
<LinkSmart Value="True"/>
81+
</Linking>
82+
<Other>
83+
<CustomOptions Value="-dRELEASE"/>
84+
</Other>
85+
</CompilerOptions>
86+
</Item3>
87+
<Item4 Name="Valgrind">
88+
<CompilerOptions>
89+
<Version Value="11"/>
90+
<PathDelim Value="\"/>
91+
<Target>
92+
<Filename Value="..\..\..\bin\ghatem-partslarge"/>
93+
</Target>
94+
<SearchPaths>
95+
<IncludeFiles Value="$(ProjOutDir)"/>
96+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
97+
</SearchPaths>
98+
<Linking>
99+
<Debugging>
100+
<DebugInfoType Value="dsDwarf3"/>
101+
<TrashVariables Value="True"/>
102+
<UseValgrind Value="True"/>
103+
<UseExternalDbgSyms Value="True"/>
104+
</Debugging>
105+
</Linking>
106+
<Other>
107+
<CustomOptions Value="-dDEBUG"/>
108+
</Other>
109+
</CompilerOptions>
110+
</Item4>
111+
</BuildModes>
112+
<PublishOptions>
113+
<Version Value="2"/>
114+
<UseFileFilters Value="True"/>
115+
</PublishOptions>
116+
<RunParams>
117+
<FormatVersion Value="2"/>
118+
</RunParams>
119+
<RequiredPackages Count="2">
120+
<Item1>
121+
<PackageName Value="mormot2"/>
122+
</Item1>
123+
<Item2>
124+
<PackageName Value="LCLBase"/>
125+
</Item2>
126+
</RequiredPackages>
127+
<Units Count="1">
128+
<Unit0>
129+
<Filename Value="OneBRC-parts-largerec.lpr"/>
130+
<IsPartOfProject Value="True"/>
131+
</Unit0>
132+
</Units>
133+
</ProjectOptions>
134+
<CompilerOptions>
135+
<Version Value="11"/>
136+
<PathDelim Value="\"/>
137+
<Target>
138+
<Filename Value="..\..\..\bin\ghatem-partslarge"/>
139+
</Target>
140+
<SearchPaths>
141+
<IncludeFiles Value="$(ProjOutDir)"/>
142+
<UnitOutputDirectory Value="..\..\..\bin\lib\$(TargetCPU)-$(TargetOS)"/>
143+
</SearchPaths>
144+
<Linking>
145+
<Debugging>
146+
<DebugInfoType Value="dsDwarf3"/>
147+
</Debugging>
148+
</Linking>
149+
<Other>
150+
<CustomOptions Value="-dDEBUG"/>
151+
</Other>
152+
</CompilerOptions>
153+
<Debugging>
154+
<Exceptions Count="3">
155+
<Item1>
156+
<Name Value="EAbort"/>
157+
</Item1>
158+
<Item2>
159+
<Name Value="ECodetoolError"/>
160+
</Item2>
161+
<Item3>
162+
<Name Value="EFOpenError"/>
163+
</Item3>
164+
</Exceptions>
165+
</Debugging>
166+
</CONFIG>

0 commit comments

Comments
 (0)