Skip to content

Commit b6b4be6

Browse files
authored
Merge pull request #151 from georges-hatem/main
replace FormatFloat with own implementation
2 parents b2e58aa + 47c9b93 commit b6b4be6

File tree

4 files changed

+97
-30
lines changed

4 files changed

+97
-30
lines changed

entries/ghatem-fpc/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,31 @@ Another trial with various hash functions, a simple modulus vs. a slightly more
247247
Can be tested with the HASHMULT build option
248248

249249
Finally, it seems choosing a dictionary size that is a prime number is also recommended: shaves 1 second out of 20 on my PC.
250+
251+
## v.6 (2024-05-04)
252+
253+
As of the latest results executed by Paweld, there are two main bottlenecks throttling the entire implementation, according to CallGrind and KCacheGrind:
254+
- function ExtractLineData, 23% of total cost, of which 9% is due to `fpc_stackcheck`
255+
- the hash lookup function, at 40% of total cost
256+
257+
Currently, the hash lookup is done on an array of records. Increasing the array size causes slowness, and reducing it causes further collisions.
258+
Will try to see how to reduce collisions (increase array size), all while minimizing the cost of cache misses.
259+
260+
Edit:
261+
The goal is to both:
262+
- minimize collisions on the hashes (keys) by having a good hash function, but also increase the size of the keys storage
263+
- minimize the size of the array of packed records
264+
265+
The idea:
266+
- the dictionary will no longer point to a PStationData pointer, but rather to an index between 0 and StationCount, where the record is stored in the array.
267+
- -> data about the same station will be stored at the same index for all threads' data-arrays
268+
- -> names will also be stored at that same index upon first encounter, and is common to all threads
269+
- no locking needs to occur when the key is already found, since there is no multiple-write occurring
270+
- the data-arrays are pre-allocated, and a atomic-counter will be incremented to know where the next element will be stored.
271+
272+
Thinking again, this is likely similar to the approach mentioned by @synopse in one of his comments.
273+
274+
For the ExtractLineData, three ideas to try implementing:
275+
- avoid using a function, to get rid of the cost of stack checking
276+
- reduce branching, I think it should be possible to go from 3 if-statements, to only 1
277+
- unroll the loop (although I had tried this in the past, did not show any improvements)

entries/ghatem-fpc/src/OneBRC-nosharedname.lpr

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
151151
Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
152152
end;
153153

154-
function RoundExDouble (const aTemp: Double): Double; inline;
154+
function RoundExInteger (const aTemp: Double): Integer; inline;
155155
var
156156
vTmp: Double;
157157
begin
158158
vTmp := aTemp * 10;
159-
Result := Ceiling (vTmp) / 10;
159+
Result := Ceiling (vTmp);
160160
end;
161161

162162
//---------------------------------------------------
@@ -473,8 +473,24 @@ procedure TOneBRC.MergeAll;
473473

474474
//---------------------------------------------------
475475

476+
function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
477+
begin
478+
Result := IntToStr(aIn);
479+
Insert ('.', Result, Length(Result));
480+
481+
if Result[1] = '.' then begin
482+
Insert ('0', Result, 1);
483+
exit;
484+
end;
485+
486+
if (Result[1] = '-') and (Result[2] = '.') then
487+
Insert('0', Result, 2);
488+
end;
489+
490+
//---------------------------------------------------
491+
476492
procedure TOneBRC.GenerateOutput;
477-
var vMin, vMean, vMax: Double;
493+
var vMean: Integer;
478494
vStream: TStringStream;
479495
I, N: Int64;
480496
vData: PStationData;
@@ -507,14 +523,12 @@ procedure TOneBRC.GenerateOutput;
507523
// debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
508524
vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
509525
FStationsDicts[0].TryGetValue(vHash, vData);
510-
vMin := vData^.Min/10;
511-
vMax := vData^.Max/10;
512-
vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
526+
vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
513527

514528
vStream.WriteString(
515-
vStations[i] + '=' + FormatFloat('0.0', vMin)
516-
+ '/' + FormatFloat('0.0', vMean)
517-
+ '/' + FormatFloat('0.0', vMax) + ', '
529+
vStations[i] + '=' + MyFormatInt(vData^.Min)
530+
+ '/' + MyFormatInt(vMean)
531+
+ '/' + MyFormatInt(vData^.Max) + ', '
518532
);
519533
Inc(I);
520534
end;
@@ -645,4 +659,3 @@ procedure TOneBRCApp.WriteHelp;
645659
end.
646660

647661
{$ENDREGION}
648-

entries/ghatem-fpc/src/OneBRC-smallrec.lpr

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
154154
Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
155155
end;
156156

157-
function RoundExDouble (const aTemp: Double): Double; inline;
157+
function RoundExInteger (const aTemp: Double): Integer; inline;
158158
var
159159
vTmp: Double;
160160
begin
161161
vTmp := aTemp * 10;
162-
Result := Ceiling (vTmp) / 10;
162+
Result := Ceiling (vTmp);
163163
end;
164164

165165
//---------------------------------------------------
@@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll;
487487

488488
//---------------------------------------------------
489489

490+
function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
491+
begin
492+
Result := IntToStr(aIn);
493+
Insert ('.', Result, Length(Result));
494+
495+
if Result[1] = '.' then begin
496+
Insert ('0', Result, 1);
497+
exit;
498+
end;
499+
500+
if (Result[1] = '-') and (Result[2] = '.') then
501+
Insert('0', Result, 2);
502+
end;
503+
504+
//---------------------------------------------------
505+
490506
procedure TOneBRC.GenerateOutput;
491-
var vMin, vMean, vMax: Double;
507+
var vMean: Integer;
492508
vStream: TStringStream;
493509
I, N: Int64;
494510
vData: PStationData;
@@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput;
521537
// debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
522538
vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
523539
FStationsDicts[0].TryGetValue(vHash, vData);
524-
vMin := vData^.Min/10;
525-
vMax := vData^.Max/10;
526-
vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
540+
vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
527541

528542
vStream.WriteString(
529-
vStations[i] + '=' + FormatFloat('0.0', vMin)
530-
+ '/' + FormatFloat('0.0', vMean)
531-
+ '/' + FormatFloat('0.0', vMax) + ', '
543+
vStations[i] + '=' + MyFormatInt(vData^.Min)
544+
+ '/' + MyFormatInt(vMean)
545+
+ '/' + MyFormatInt(vData^.Max) + ', '
532546
);
533547
Inc(I);
534548
end;
@@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp;
659673
end.
660674

661675
{$ENDREGION}
662-

entries/ghatem-fpc/src/OneBRC.lpr

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ function Ceiling (const ANumber: Double): Integer; inline;
154154
Result := Trunc(ANumber) + Ord(Frac(ANumber) > 0);
155155
end;
156156

157-
function RoundExDouble (const aTemp: Double): Double; inline;
157+
function RoundExInteger (const aTemp: Double): Integer; inline;
158158
var
159159
vTmp: Double;
160160
begin
161161
vTmp := aTemp * 10;
162-
Result := Ceiling (vTmp) / 10;
162+
Result := Ceiling (vTmp);
163163
end;
164164

165165
//---------------------------------------------------
@@ -487,8 +487,24 @@ procedure TOneBRC.MergeAll;
487487

488488
//---------------------------------------------------
489489

490+
function MyFormatInt (const aIn: SmallInt): AnsiString; inline;
491+
begin
492+
Result := IntToStr(aIn);
493+
Insert ('.', Result, Length(Result));
494+
495+
if Result[1] = '.' then begin
496+
Insert ('0', Result, 1);
497+
exit;
498+
end;
499+
500+
if (Result[1] = '-') and (Result[2] = '.') then
501+
Insert('0', Result, 2);
502+
end;
503+
504+
//---------------------------------------------------
505+
490506
procedure TOneBRC.GenerateOutput;
491-
var vMin, vMean, vMax: Double;
507+
var vMean: Integer;
492508
vStream: TStringStream;
493509
I, N: Int64;
494510
vData: PStationData;
@@ -521,14 +537,12 @@ procedure TOneBRC.GenerateOutput;
521537
// debatable, and the whole output generation is < 0.3 seconds, so not exactly worth it
522538
vHash := crc32c(0, @vStations[i][1], Length (vStations[i]));
523539
FStationsDicts[0].TryGetValue(vHash, vData);
524-
vMin := vData^.Min/10;
525-
vMax := vData^.Max/10;
526-
vMean := RoundExDouble(vData^.Sum/vData^.Count/10);
540+
vMean := RoundExInteger(vData^.Sum/vData^.Count/10);
527541

528542
vStream.WriteString(
529-
vStations[i] + '=' + FormatFloat('0.0', vMin)
530-
+ '/' + FormatFloat('0.0', vMean)
531-
+ '/' + FormatFloat('0.0', vMax) + ', '
543+
vStations[i] + '=' + MyFormatInt(vData^.Min)
544+
+ '/' + MyFormatInt(vMean)
545+
+ '/' + MyFormatInt(vData^.Max) + ', '
532546
);
533547
Inc(I);
534548
end;
@@ -659,4 +673,3 @@ procedure TOneBRCApp.WriteHelp;
659673
end.
660674

661675
{$ENDREGION}
662-

0 commit comments

Comments
 (0)