Skip to content

Commit 6612e4c

Browse files
Merge branch 'gcarreno:main' into main
2 parents 2195064 + ae54a7b commit 6612e4c

File tree

10 files changed

+1474
-964
lines changed

10 files changed

+1474
-964
lines changed

entries/bfire/src/MultiThreadUnit.pas

Lines changed: 1138 additions & 370 deletions
Large diffs are not rendered by default.

entries/bfire/src/ProcessByHashUnit.pas

Lines changed: 0 additions & 517 deletions
This file was deleted.

entries/bfire/src/README.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Brian Fire
2+
3+
An Entry to the One Billion Row Challenge in Object Pascal using Delphi 12 by [EagleAglow](https://github.com/EagleAglow), Discord: briar.on.fire
4+
5+
## Compiler
6+
7+
**Delphi 12** Professional Edition
8+
9+
### Dependencies
10+
11+
Project uses Delphi System units: `Classes`, `SysUtils`, `StrUtils`, `Diagnostics`,
12+
`Threading` and `SyncObjs`.
13+
14+
### Execution
15+
```
16+
Usage
17+
bfire -h | Write this help message and exit
18+
bfire -v | Write the version and exit
19+
bfire -i <file_1> | <file_1> contains Weather Data
20+
bfire -i <file_1> -o <file_2> | <file_1> contains Weather Data
21+
| <file_2> contains result
22+
If <file_2> is not defined, result goes to CONSOLE (STDOUT)
23+
```
24+
25+
#### Contest Mode
26+
27+
To run the challenge, read from the 'challenge.csv' file:
28+
29+
```console
30+
C:> bfire -i challenge.csv
31+
```
32+
33+
## Remarks
34+
35+
I haven't used Delphi very much recently, really needed to work on this for a refresher.
36+
I like TStringList self-sorting, but it is not as fast as other techniques.
37+
Now that this entry is set up, I can play with improvements. Maybe even get a time under 15 minutes! :)
38+
39+
Second version uses hash of station name to accumulate data and fill a TStringList for Unicode station name.
40+
The list is initially unsorted and has linked objects for records holding accumulated data for each station.
41+
Finally, the TStringList is sorted and used to output sorted data.
42+
43+
Third version has a thread for the console (which waits for tabulation, then sorts and writes results),
44+
two threads to read file, five threads to tabulate stations. Stations are grouped into five separate stacks,
45+
so each tabulation thread has roughly the same work load. File is read byte-wise into "classic" byte array
46+
for each file line ending in ascii 10. Each of these arrays is queued as a record in a last-in-first-out stack.
47+
Tabulation threads split the data into station name and temperature, then hash station name and use hash
48+
as index into one of five data arrays. After all data is read and tabulated, the five data arrays are added to an
49+
initially unsorted TStringList that holds unsorted Unicode station name and has linked pointers to
50+
tabulated data for each station. Finally, the TStringList is sorted, and the data is output.
51+
52+
## History
53+
54+
- Version 1.0: first working version, based on TStringList.
55+
- Version 1.1: modified rounding to new baseline.
56+
- Version 2.0: use hashing, sort later.
57+
- Version 2.1: minor speed tweaks.
58+
- Version 2.2: try hash functions modification.
59+
- Version 3.0: Six threads: one to read, four to tabulate, one (console) to rule them all...
60+
- Version 3.1: Safer locking strategy - didn't work.
61+
- Version 3.2: Eigth threads: two to read, five to tabulate, one (console) to rule them all...
62+

entries/bfire/src/bfire.dpr

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ uses
1313
ConsoleUnit in 'ConsoleUnit.pas';
1414

1515
var
16-
UseStdOut: Boolean; // True unless output file is defined
1716
start: TDateTime; // for timing
1817

1918
begin
@@ -49,14 +48,16 @@ begin
4948

5049
if Not(UseStdOut) then // wait and report
5150
begin
52-
while Not(ReadFile_Done and ParseDataQ_Done1 and ParseDataQ_Done2 and
53-
ParseDataQ_Done3 and ParseDataQ_Done4) do
51+
// while Not(ReadFile_Done1 and ReadFile_Done2 and ParseDataQ_Done1 and
52+
// ParseDataQ_Done2 and ParseDataQ_Done3 and ParseDataQ_Done4 and
53+
// ParseDataQ_Done5 and ParseDataQ_Done6) do
54+
while Not(ReadFile_Done1 and ReadFile_Done2 and ParseData_Done) do
5455
begin
5556
Sleep(1000);
5657
WriteLn('Lines: ' + IntToStr(LineCount) + ' Stacks: ' +
5758
IntToStr(DataStackCount1) + ' / ' + IntToStr(DataStackCount2) +
5859
' / ' + IntToStr(DataStackCount3) + ' / ' +
59-
IntToStr(DataStackCount4));
60+
IntToStr(DataStackCount4) + ' / ' + IntToStr(DataStackCount5));
6061

6162
if DataStackCount1 > StackMax1 then
6263
StackMax1 := DataStackCount1;
@@ -66,28 +67,30 @@ begin
6667
StackMax3 := DataStackCount3;
6768
if DataStackCount4 > StackMax4 then
6869
StackMax4 := DataStackCount4;
69-
70-
if ReadFile_Done then
71-
WriteLn('Done reading file');
72-
if ParseDataQ_Done1 then
73-
WriteLn('Done with ParseDataQ1');
74-
if ParseDataQ_Done2 then
75-
WriteLn('Done with ParseDataQ2');
76-
if ParseDataQ_Done3 then
77-
WriteLn('Done with ParseDataQ3');
78-
if ParseDataQ_Done4 then
79-
WriteLn('Done with ParseDataQ4');
70+
if DataStackCount5 > StackMax5 then
71+
StackMax5 := DataStackCount5;
72+
73+
if ReadFile_Done1 then
74+
WriteLn('Done with reading thread 1');
75+
if ReadFile_Done2 then
76+
WriteLn('Done with reading thread 2');
77+
if ParseData_Done then
78+
WriteLn('Done with tabulating threads');
8079
end;
8180
end
8281
else // just wait
8382
begin
84-
while Not(ReadFile_Done and ParseDataQ_Done1 and ParseDataQ_Done2 and
85-
ParseDataQ_Done3 and ParseDataQ_Done4) do
83+
// while Not(ReadFile_Done1 and ReadFile_Done2 and ParseDataQ_Done2 and
84+
// ParseDataQ_Done2 and ParseDataQ_Done3 and ParseDataQ_Done4 and
85+
// ParseDataQ_Done5 and ParseDataQ_Done6) do
86+
while Not(ReadFile_Done1 and ReadFile_Done2 and ParseData_Done) do
8687
begin
8788
Sleep(100);
8889
end;
8990
end;
9091

92+
Challenge.Free;
93+
9194
SortArrays; // sort
9295
ArrayToFile(outputFilename, UseStdOut); // output
9396

@@ -97,7 +100,8 @@ begin
97100
Now - start)]));
98101

99102
WriteLn('Stack Max: ' + IntToStr(StackMax1) + '/' + IntToStr(StackMax2)
100-
+ '/' + IntToStr(StackMax3) + '/' + IntToStr(StackMax4));
103+
+ '/' + IntToStr(StackMax3) + '/' + IntToStr(StackMax4) + '/' +
104+
IntToStr(StackMax5));
101105

102106
WriteLn('Press ENTER to exit');
103107
readln;

entries/bfire/src/version.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
const
2-
cVersion = '3.1';
2+
cVersion = '3.2';

entries/hgrosser/README.md

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,33 @@
33
**1 billion row Challenge entry**
44

55
## Version
6-
Version 2.00 (first version with threads)
6+
Version 2.10 (version with threads, does NOT yet work for 400 stations or 5 billion rows)
77

88
## How to compile
99
The program was developed with FPC 3.2.2 and Lazarus 2.2.4
1010

11-
1brc.lpi + 1brc.pas = version without threads
11+
It uses package **mORMot 2** to compile, which you can download from https://github.com/synopse/mORMot2
1212

13-
1brc_th.lpi + 1brc_th.pas = version with threads
13+
Files to compile: **1brc_th.lpi** and **1brc_th.pas** = this version with threads
1414

1515
## How to start
1616
```
17-
Usage: <path to input file> <thread count> [<bit-width for hash-list> [buffer size in KB]]
18-
- thread count: allowed range = [1..32]
17+
Usage: <path to input file> <thread count> [bit-width for hash-list]
18+
- thread count: allowed range = [1..64]
1919
- bit-width for hash-list: sets the size of the hash list, e.g. '16' => 65536 entries,
20-
allowed range = [16..28], Default=18
21-
- buffer size in KB: allowed range = [1..2,000,000 KB], Default=128 KB
22-
Example: hgrosser measurements.txt 32 18 128
20+
allowed range = [16..28], Default=16
21+
Example: hgrosser measurements.txt 32 16
2322
```
24-
There are no switches like `-i` etc, only 2..4 values.
23+
There are no switches like `-i` etc, only 2 or 3 values.
2524

26-
### Optimizing the 2nd command line parameter
27-
This parameter sets the thread count. With my own old CPU I could only test 1..4 threads. Although I expect 32 threads to be the fastest, I would be very interested to see the results for 1, 4, 8, 16 and 32 threads. Please use for this test the defaults for the 3rd and 4th command line parameters.
28-
29-
### Optimizing the 3rd command line parameter
30-
In theory the program should run faster with greater bit-widths for the hash-list (because of less collisions). On the computer of Gus - without threads - 18 bits was the fastest. Please try the values from 16 to 20 again and use '32' for the 2nd command line parameter and the default for the 4th command line parameter.
31-
32-
### Optimizing the 4th command line parameter
33-
After the 2nd and 3rd command line parameters had been optimized, please try with them 64, 96, 128, 192 and 256 KB as the 4th command line parameter. Thanks a lot!
25+
There is no more need for further optimizing. Please use '32' as 2nd command line parameter and '16' as 3rd command line parameter.
3426

3527
## How the program works
36-
The Program works with multi threads.
28+
The Program works with threads.
3729

3830
To speed things up:
3931

40-
- the input file is read via procedure 'blockread' ...
41-
- into an AnsiString, so that function 'PosEx' can be used to parse it
32+
- the input file is read via **mORMot**'s type 'TMemoryMap'
4233
- to manage the city names, a self made hash-list is used
4334
- temperatures are stored as integers (multiplied by 10)
4435

@@ -50,4 +41,4 @@ To speed things up:
5041
- Version 1.60: hash-list optimized, some minor improvements, Conditional "noCR" added
5142
- Version 1.61: Conditional "noCR" constantely enabled => input files must NOT have CR's
5243
- Version 2.00: 1st version with threads
53-
44+
- Version 2.10: the input file is now read via **mORMot**'s type 'TMemoryMap', the hash code is now computed via **mORMot**'s crc32c-function, several small improvements

entries/hgrosser/src/1brc.lpi renamed to entries/hgrosser/src/1brc_old.lpi

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
<CompatibilityMode Value="True"/>
1212
</Flags>
1313
<SessionStorage Value="InIDEConfig"/>
14-
<Title Value="1brc"/>
14+
<Title Value="1brc_old"/>
1515
<UseAppBundle Value="False"/>
1616
<ResourceType Value="res"/>
1717
</General>
@@ -24,7 +24,7 @@
2424
<CompilerOptions>
2525
<Version Value="11"/>
2626
<Target>
27-
<Filename Value="../../../bin/hgrosser"/>
27+
<Filename Value="../../../bin/hgrosser_old"/>
2828
</Target>
2929
<SearchPaths>
3030
<IncludeFiles Value="$(ProjOutDir)"/>
@@ -59,7 +59,7 @@
5959
<CompilerOptions>
6060
<Version Value="11"/>
6161
<Target>
62-
<Filename Value="../../../bin/hgrosser"/>
62+
<Filename Value="../../../bin/hgrosser_old"/>
6363
</Target>
6464
<SearchPaths>
6565
<IncludeFiles Value="$(ProjOutDir)"/>
@@ -104,15 +104,15 @@
104104
</RunParams>
105105
<Units Count="1">
106106
<Unit0>
107-
<Filename Value="1brc.pas"/>
107+
<Filename Value="1brc_old.pas"/>
108108
<IsPartOfProject Value="True"/>
109109
</Unit0>
110110
</Units>
111111
</ProjectOptions>
112112
<CompilerOptions>
113113
<Version Value="11"/>
114114
<Target>
115-
<Filename Value="../../../bin/hgrosser"/>
115+
<Filename Value="../../../bin/hgrosser_old"/>
116116
</Target>
117117
<SearchPaths>
118118
<IncludeFiles Value="$(ProjOutDir)"/>
File renamed without changes.

entries/hgrosser/src/1brc_th.lpi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@
101101
</Mode0>
102102
</Modes>
103103
</RunParams>
104+
<RequiredPackages Count="1">
105+
<Item1>
106+
<PackageName Value="mormot2"/>
107+
</Item1>
108+
</RequiredPackages>
104109
<Units Count="1">
105110
<Unit0>
106111
<Filename Value="1brc_th.pas"/>
@@ -115,6 +120,7 @@
115120
</Target>
116121
<SearchPaths>
117122
<IncludeFiles Value="$(ProjOutDir)"/>
123+
<OtherUnitFiles Value="../../../baseline/Common"/>
118124
<UnitOutputDirectory Value="../../../bin/lib/$(TargetCPU)-$(TargetOS)"/>
119125
</SearchPaths>
120126
<Parsing>

0 commit comments

Comments
 (0)