Skip to content

Commit a0ff212

Browse files
authored
Merge pull request #101 from georges-hatem/main
ghatem: first multi-threaded attempt
2 parents 094e830 + 0b167d9 commit a0ff212

File tree

3 files changed

+281
-81
lines changed

3 files changed

+281
-81
lines changed

entries/ghatem-fpc/README.md

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,4 +132,54 @@ Instead of extracting the station name as a string 1B times, and use it as a dic
132132
This requires us to migrate from TFPHashList to the generic TDictionary. Even though TDictionary is slower than TFPHashList, the overall improvements yielded a significant performance gain.
133133
Using TDictionary, the code is more similar to my Delphi version, in case we get to run those tests on a Windows PC.
134134

135-
** expected timing: ~60 seconds, single-threaded**
135+
* expected timing: ~60 seconds, single-threaded*
136+
** ACTUAL TIMING: 58 seconds as per gcarreno **
137+
138+
139+
## Multi-Threaded Attempt (2024-04-10)
140+
141+
In a first attempt, using 2 threads, I evaluated how harmful were my "shared-memory" variables, namely:
142+
- the pre-allocated records array
143+
- its counter
144+
- the dictionary of aggregated data
145+
- the stringlist of station names
146+
147+
Using a Critical-Section, even on just the stringlist, the performance drops quite a bit.
148+
149+
### minimize shared-memory
150+
151+
I got rid of the pre-allocated records array and its counter, back to on-the-fly allocation of records
152+
Got rid of the station names stringlist, because merging those lists at the end was very slow: instead, store the station name directly in the record.
153+
Replaced the singular dictionary with an array of dictionaries, 1-per-thread.
154+
155+
### redundant work
156+
157+
All of this causes some redundant work to be made by various threads:
158+
- one record is allocated per-station-per-thread
159+
- since we store the station name in the record, we convert it to string (using `SetString`) once per-station-per-thread
160+
161+
### parallelism
162+
163+
Given N bytes and K threads, a basic attempt is to distribute a range `N / K` of data per thread.
164+
A thread may get its start/end boundaries in the middle of a line: we ensure that each line is processed exactly once.
165+
166+
Again, just to evaluate how things will go, we wait for all threads to complete before proceeding.
167+
168+
### merging
169+
170+
Given 2 dictionaries L and R, we merge them into L. This merge is applied K-1 times, and dictionaries[0] will contain the aggregated result.
171+
We could later consider merging every 2 threads that have completed their work.
172+
173+
### expectations
174+
175+
parallelization on my environment (see above) performed quite poorly, considering there is no shared-mem to protect against concurrent access:
176+
4 threads were barely getting 60% improvement over 1 thread.
177+
178+
is it due to:
179+
- VM virtualization?
180+
- too many dictionaries were causing too many cache misses?
181+
- work-load distributed unevenly?
182+
183+
Better wait and see the results on the real environment, before judging.
184+
185+

entries/ghatem-fpc/src/OneBRCproj.lpr

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
TOneBRCApp = class(TCustomApplication)
1818
private
19+
FFileName: string;
20+
procedure RunOneBRC;
1921
protected
2022
procedure DoRun; override;
2123
public
@@ -26,13 +28,65 @@ TOneBRCApp = class(TCustomApplication)
2628

2729
{ TOneBRCApp }
2830

29-
procedure TOneBRCApp.DoRun;
31+
procedure TOneBRCApp.RunOneBRC;
3032
var
31-
ErrorMsg: String;
3233
vOneBRC: TOneBRC;
33-
vFileName: string;
3434
vStart: Int64;
3535
vTime: Int64;
36+
begin
37+
vOneBRC := TOneBRC.Create (32);
38+
try
39+
try
40+
vOneBRC.mORMotMMF(FFileName);
41+
vOneBRC.DispatchThreads;
42+
vOneBRC.WaitAll;
43+
vOneBRC.MergeAll;
44+
vOneBRC.GenerateOutput;
45+
46+
//vStart := GetTickCount;
47+
//vOneBRC.mORMotMMF (FFileName);
48+
//vTime := GetTickCount - vStart;
49+
//WriteLn('read: ' + FloatToStr(vTime / 1000));
50+
//WriteLn('-----------');
51+
//WriteLn;
52+
//
53+
//vStart := GetTickCount;
54+
//vOneBRC.DispatchThreads;
55+
//vOneBRC.WaitAll;
56+
//vTime := GetTickCount - vStart;
57+
//WriteLn('process: ' + FloatToStr(vTime / 1000));
58+
//WriteLn('-----------');
59+
//WriteLn;
60+
//
61+
//vStart := GetTickCount;
62+
//vOneBRC.MergeAll;
63+
//vTime := GetTickCount - vStart;
64+
//WriteLn('merge: ' + FloatToStr(vTime / 1000));
65+
//WriteLn('-----------');
66+
//WriteLn;
67+
//
68+
//vStart := GetTickCount;
69+
//vOneBRC.GenerateOutput;
70+
//vTime := GetTickCount - vStart;
71+
//WriteLn('generate: ' + FloatToStr(vTime / 1000));
72+
//WriteLn('-----------');
73+
//WriteLn;
74+
//ReadLn;
75+
except
76+
on E: Exception do
77+
begin
78+
WriteLn(Format(rsErrorMessage, [ E.Message ]));
79+
ReadLn;
80+
end;
81+
end;
82+
finally
83+
vOneBRC.Free;
84+
end;
85+
end;
86+
87+
procedure TOneBRCApp.DoRun;
88+
var
89+
ErrorMsg: String;
3690
begin
3791
// quick check parameters
3892
ErrorMsg:= CheckOptions(Format('%s%s%s:',[
@@ -67,7 +121,7 @@ procedure TOneBRCApp.DoRun;
67121
end;
68122

69123
if HasOption(cShortOptInput, cLongOptInput) then begin
70-
vFileName := GetOptionValue(
124+
FFileName := GetOptionValue(
71125
cShortOptInput,
72126
cLongOptInput
73127
);
@@ -78,45 +132,9 @@ procedure TOneBRCApp.DoRun;
78132
Exit;
79133
end;
80134

81-
vFileName := ExpandFileName(vFileName);
135+
FFileName := ExpandFileName(FFileName);
82136

83-
vOneBRC := TOneBRC.Create;
84-
try
85-
try
86-
vOneBRC.mORMotMMF(vFileName);
87-
vOneBRC.SingleThread;
88-
vOneBRC.GenerateOutput;
89-
90-
{vStart := GetTickCount;
91-
vOneBRC.mORMotMMF (vFileName);
92-
vTime := GetTickCount - vStart;
93-
WriteLn('read: ' + FloatToStr(vTime / 1000));
94-
WriteLn('-----------');
95-
WriteLn;
96-
97-
vStart := GetTickCount;
98-
vOneBRC.SingleThread;
99-
vTime := GetTickCount - vStart;
100-
WriteLn('process: ' + FloatToStr(vTime / 1000));
101-
WriteLn('-----------');
102-
WriteLn;
103-
104-
vStart := GetTickCount;
105-
vOneBRC.GenerateOutput;
106-
vTime := GetTickCount - vStart;
107-
WriteLn('generate: ' + FloatToStr(vTime / 1000));
108-
WriteLn('-----------');
109-
WriteLn;
110-
ReadLn; }
111-
except
112-
on E: Exception do
113-
begin
114-
WriteLn(Format(rsErrorMessage, [ E.Message ]));
115-
end;
116-
end;
117-
finally
118-
vOneBRC.Free;
119-
end;
137+
RunOneBRC;
120138

121139
// stop program loop
122140
Terminate;

0 commit comments

Comments
 (0)