Skip to content

Commit 2da8716

Browse files
authored
Merge pull request #37 from ikelaiah/ikelaiah-rev03
Update - Rev 3, 3-4 mins faster.
2 parents 276ee92 + 551ad62 commit 2da8716

File tree

3 files changed

+172
-11
lines changed

3 files changed

+172
-11
lines changed

entries/ikelaiah/README.md

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ The approach I implemented here is simplistic.
66

77
- Sequentially read the measurement file.
88
- Populate a `TDictionary` with station names, min, max, count and sum; without storing all the temperature measurements.
9-
- Avoided [StrUtil.SplitString](https://www.freepascal.org/docs-html/rtl/strutils/splitstring.html), used [`Pos()`](https://www.freepascal.org/docs-html/rtl/system/pos.html) and [`Copy()`](https://www.freepascal.org/docs-html/rtl/system/copy.html) instead.
9+
- Format output and sort in `TStringList`.
1010
- Use a custom comparer to sort the station and temperature statistics in a `TStringList`.
1111
- Use the rounding method as provided in the `baseline.lpr` (or the `README.md` from 1brc-ObjectPascal).
1212
- Display the sorted measurements using a simple `for` loop.
@@ -34,6 +34,30 @@ To time the execution, do the following.
3434
$ time ./OneBRC.exe -i your_measurements.txt
3535
```
3636

37+
### Save an output
38+
39+
```bash
40+
$ ./ikelaiah.exe -i measurements.txt > ikelaiah-output.txt
41+
```
42+
43+
### Verifying SHA256 output on Windows
44+
45+
Launch `git bash` (make sure you have Git for Windows installed).
46+
47+
Run dos2unix on your output. The `\r\n` on Windows changes calculation of `sha256`.
48+
49+
```bash
50+
$ dos2unix.exe ikelaiah-output.txt
51+
dos2unix: converting file ikelaiah-output.txt to Unix format...
52+
```
53+
54+
Run `sha256sum` on your output.
55+
56+
```bash
57+
$ sha256sum.exe ikelaiah-output.txt
58+
db3d79d31b50daa8c03a1e4f2025029cb137f9971aa04129d8bca004795ae524 *ikelaiah-output.txt
59+
```
60+
3761
## Help
3862

3963
To see flags, use `-h`.
@@ -54,7 +78,7 @@ Use `-v` to check version.
5478

5579
```bash
5680
$ ./OneBRC.exe -v
57-
OneBRC version 1.0
81+
OneBRC version 1.3
5882
```
5983

6084
## Authors
@@ -65,7 +89,18 @@ Iwan Kelaiah
6589
## Version History
6690

6791
* 1.0
68-
* Initial Release - Sequential approach.
92+
* Initial Release - Sequential approach.
93+
* `AssignFile` -> `Reset` -> Parse weather station and the recorded temperature with `TStringHelper.Split` -> `TDictionary` -> `TStringList` -> A `for` loop -> output.
94+
95+
* 1.2
96+
* Revision release - Sequential approach.
97+
* Replaced `TStringHelper.Split` with `Pos()` and `Copy()`. 2-3 mins faster for 1 billion rows.
98+
* Float now stored as Int64. 2-3 mins faster for 1 billion rows.
99+
* Applied baseline's rounding.
100+
101+
* 1.3
102+
* Revision release - Sequential approach.
103+
* Replaced `AssignFile()` and `Reset()` with `TfileStream` and `TStreamReader`. 3-4 mins faster for 1 billion rows.
69104

70105
## License
71106

entries/ikelaiah/src/OneBRC.lpr

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
WeatherStation;
3333

3434
const
35-
version = '1.0';
35+
version = '1.3';
3636

3737
type
3838

@@ -96,10 +96,16 @@ TOneBRC = class(TCustomApplication)
9696
Exit;
9797
end;
9898

99-
// Start the main algorithm
100-
WeatherStation.ProcessTempMeasurements(filename);
99+
// Start the main algorithm ////////////////////////////////////////////////
100+
try
101+
WeatherStation.ProcessTempMeasurementsV3(filename);
102+
except
103+
on E: Exception do
104+
WriteLn('Error: ' + E.Message);
105+
end;
106+
101107

102-
// stop program loop
108+
// Stop program loop ///////////////////////////////////////////////////////
103109
Terminate;
104110
end;
105111

entries/ikelaiah/src/weatherstation.pas

Lines changed: 124 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,19 @@ interface
77
Classes,
88
SysUtils,
99
Generics.Collections,
10-
Math
10+
Math,
11+
streamex
1112
{$IFDEF DEBUG}
12-
, Stopwatch
13+
, Stopwatch
1314
{$ENDIF}
1415
;
1516

17+
type
18+
TParsedData = record
19+
wsName: string;
20+
wsTemp: int64;
21+
end;
22+
1623
type
1724
// Create a record of temperature stats
1825
TStat = record
@@ -28,7 +35,6 @@ TStat = record
2835
constructor Create(const newMin: int64; const newMax: int64;
2936
const newSum: int64; const newCount: int64);
3037
function ToString: string;
31-
3238
end;
3339

3440
type
@@ -37,6 +43,7 @@ TStat = record
3743

3844
// The main algorithm to process the temp measurements from various weather station
3945
procedure ProcessTempMeasurements(const filename: string);
46+
procedure ProcessTempMeasurementsV3(const filename: string);
4047

4148
implementation
4249

@@ -137,17 +144,26 @@ procedure AddCityTemperature(const cityName: string; const newTemp: int64;
137144

138145
// Update the stat of this city
139146
weatherDictionary.AddOrSetValue(cityName, stat);
147+
{$IFDEF DEBUG}
148+
// Display the line.
149+
// WriteLn('Updated: ', cityName);
150+
{$ENDIF DEBUG}
140151
end;
141152

142153
// If city name doesn't exist add a new entry
143154
if not weatherDictionary.ContainsKey(cityName) then
144155
begin
145156
weatherDictionary.Add(cityName, TStat.Create(newTemp, newTemp, newTemp, 1));
157+
{$IFDEF DEBUG}
158+
// Display the line.
159+
// WriteLn('Added: ', cityName);
160+
{$ENDIF DEBUG}
146161
end;
147162
end;
148163

149164
procedure ProcessTempMeasurements(const filename: string);
150165
var
166+
buf: array[0..2047] of char;
151167
wd: TWeatherDictionary;
152168
line, ws, strTemp: string;
153169
weatherStationList: TStringList;
@@ -164,6 +180,7 @@ procedure ProcessTempMeasurements(const filename: string);
164180

165181
// Read text file //////////////////////////////////////////////////////////
166182
AssignFile(textFile, filename);
183+
SetTextBuf(textFile, buf[0], sizeof(buf));
167184

168185
// Perform the read operation in a try..except block to handle errors gracefully
169186
try
@@ -197,7 +214,10 @@ procedure ProcessTempMeasurements(const filename: string);
197214

198215
// Add the weather station and the recorded temp (as int64) in the TDictionary
199216
Val(strTemp, intTemp, valCode);
200-
if valCode <> 0 then Continue;
217+
218+
if valCode <> 0 then
219+
Continue;
220+
201221
AddCityTemperature(ws, intTemp, wd);
202222
end;
203223
end; // end while loop reading line at a time
@@ -258,4 +278,104 @@ procedure ProcessTempMeasurements(const filename: string);
258278

259279
end;
260280

281+
procedure ProcessTempMeasurementsV3(const filename: string);
282+
var
283+
284+
fReader: TStreamReader;
285+
fStream: TFileStream;
286+
line, parsedStation, strTemp, wsKey, outputList: string;
287+
parsedTemp: int64;
288+
delimiterPos, valCode, index: integer;
289+
wd: TWeatherDictionary;
290+
weatherStationList: TStringList;
291+
begin
292+
293+
// Create a city - weather dictionary
294+
wd := TWeatherDictionary.Create;
295+
// Create a city - weather TStringList for sorting
296+
weatherStationList := TStringList.Create;
297+
298+
// Create a file stream, and use TStreamReader to read the contents
299+
fStream := TFileStream.Create(filename, fmOpenRead);
300+
try
301+
fReader := TStreamReader.Create(fStream);
302+
try
303+
while not fReader.EOF do // Start read file
304+
begin
305+
line := fReader.ReadLine;
306+
// Get position of the delimiter ---------------------------------------
307+
delimiterPos := Pos(';', line);
308+
if delimiterPos > 0 then
309+
begin
310+
// Get the weather station name
311+
// Using Copy and POS - as suggested by Gemini AI.
312+
// This part saves 3 mins faster when processing 1 billion rows.
313+
parsedStation := Copy(line, 1, delimiterPos - 1);
314+
315+
// Get the temperature recorded, as string, remove '.' from string float
316+
// because we want to save it as int64.
317+
strTemp := Copy(line, delimiterPos + 1, Length(line));
318+
strTemp := StringReplace(strTemp, '.', '', [rfReplaceAll]);
319+
320+
// Add the weather station and the recorded temp (as int64) in the TDictionary
321+
Val(strTemp, parsedTemp, valCode);
322+
if valCode <> 0 then
323+
Continue;
324+
325+
// Add a record in TWeatherDictionary
326+
AddCityTemperature(parsedStation, parsedTemp, wd);
327+
328+
end; // end of checking delimiter in a line ----------------------------
329+
end; // end of reading file
330+
331+
{$IFDEF DEBUG}
332+
Stopwatch.StopTimer;
333+
WriteLn('Finished reading and parsing input file');
334+
Stopwatch.DisplayTimer;
335+
{$ENDIF}
336+
337+
// Format and sort weather station by name and temp stat /////////////////
338+
{$IFDEF DEBUG}
339+
Stopwatch.StartTimer;
340+
{$ENDIF}
341+
wsKey := '';
342+
for wsKey in wd.Keys do
343+
begin
344+
weatherStationList.Add(wsKey + '=' + wd[wsKey].ToString + ', ');
345+
end;
346+
weatherStationList.CustomSort(@CustomTStringListComparer);
347+
348+
{$IFDEF DEBUG}
349+
Stopwatch.StopTimer;
350+
WriteLn('Finished creating TStringList and sorted it');
351+
Stopwatch.DisplayTimer;
352+
{$ENDIF}
353+
354+
// Print TStringList - sorted by weather station and temp stat ///////////
355+
{$IFDEF DEBUG}
356+
Stopwatch.StartTimer;
357+
{$ENDIF}
358+
outputList := '';
359+
// Print the weather station and the temp stat
360+
for index := 0 to weatherStationList.Count - 1 do
361+
outputList := outputList + weatherStationList[index];
362+
// Remove last comma and space; ', ', a neat trick from Gus.
363+
SetLength(outputList, Length(outputList) - 2);
364+
WriteLn('{', outputList, '}');
365+
366+
{$IFDEF DEBUG}
367+
Stopwatch.StopTimer;
368+
WriteLn('Finished printing the sorted weather station and temperatures');
369+
Stopwatch.DisplayTimer;
370+
{$ENDIF}
371+
finally
372+
fReader.Free;
373+
end;
374+
finally
375+
fStream.Free;
376+
weatherStationList.Free;
377+
wd.Free;
378+
end;
379+
end;
380+
261381
end.

0 commit comments

Comments
 (0)