Skip to content

Commit 5a28640

Browse files
authored
Merge pull request #164 from ikelaiah/ikelaiah-rev12
Ikelaiah rev12
2 parents 3bbfc71 + 2fa92d3 commit 5a28640

File tree

2 files changed

+151
-20
lines changed

2 files changed

+151
-20
lines changed

entries/ikelaiah/README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ Iwan Kelaiah
145145
* Revision release - Sequential approach. 3-5 mins on my Inspiron 15 7510 laptop, around 2m55s (no improvement on speed).
146146
* Replaced `LGenerics` with `Generics.Collections` for the time being.
147147

148+
* 1.12
149+
* Revision release - Sequential approach. 2-5 mins on my Inspiron 15 7510 laptop, around 2m40s (small improvement on speed).
150+
* Called TStreamReader.ReadLn, twice in the while loop. This saves approx 5-10 seconds.
151+
* Updated the Acknowledgments section.
152+
148153
## License
149154

150155
This project is licensed under the MIT License - see the LICENSE.md file for details
@@ -164,10 +169,12 @@ Inspiration, code snippets, libraries, etc.
164169
- Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`.
165170
3. A.Koverdyaev (avk)
166171
- For the amazing (LGenerics)[https://github.com/avk959/LGenerics] library.
167-
4. Székely Balázs.
172+
4. Benito van der Zander (benibella)
173+
- FOr providing the [Free Pascal Hashmaps Benchmark](https://www.benibela.de/fpc-map-benchmark_en.html).
174+
5. Székely Balázs.
168175
- Now I know what `Single` data type is!
169176
- I borrowed the custom `TStringList` comparer from the `baseline` program.
170-
5. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go.
177+
6. Shraddha Agrawal - https://www.bytesizego.com/blog/one-billion-row-challenge-go.
171178
- The advice for not storing measurements for each station in a data structure.
172-
6. Arman Hajisafi - https://arman-hs.github.io
179+
7. Arman Hajisafi - https://arman-hs.github.io
173180
- Encouragements and inspirations.

entries/ikelaiah/src/weatherstation.pas

Lines changed: 141 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ interface
77
uses
88
Classes
99
, SysUtils
10-
, Math
1110
, streamex
1211
, bufstream
1312
//, lgHashMap
1413
, generics.Collections
14+
, csvdocument
15+
, csvdataset
1516
{$IFDEF DEBUG}
1617
, Stopwatch
1718
{$ENDIF}
@@ -29,31 +30,36 @@ TStat = record
2930
sum: int64;
3031
cnt: int64;
3132
public
32-
function ToString: ShortString;
33+
function ToString: shortstring;
3334
end;
3435
{Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3536
PStat = ^TStat;
3637

3738
type
3839
// Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
39-
TWeatherDictionaryLG = specialize TFastHashMap<ShortString, PStat>;
40+
// THashMap<shortstring, PStat> - takes around 120s.
41+
// TFastHash<shortstring, PStat> - takes around 100s.
42+
TWeatherDictionary = specialize TFastHashMap<shortstring, PStat>;
4043

4144
type
4245
// a type for storing valid lookup temperature
43-
TValidTemperatureDictionary = specialize TFastHashMap<ShortString, int64>;
46+
TValidTemperatureDictionary = specialize TFastHashMap<shortstring, int64>;
4447

4548
type
4649
// Create a class to encapsulate the temperature observations of each weather station.
4750
TWeatherStation = class
4851
private
4952
fname: string;
50-
weatherDictionary: TWeatherDictionaryLG;
53+
weatherDictionary: TWeatherDictionary;
5154
weatherStationList: TStringList;
5255
lookupStrFloatToIntList: TValidTemperatureDictionary;
5356
procedure CreateLookupTemp;
5457
procedure ReadMeasurements;
55-
procedure ParseStationAndTemp(const line: ShortString);
56-
procedure AddCityTemperatureLG(const cityName: ShortString; const newTemp: int64);
58+
procedure ReadMeasurementsBuffered;
59+
procedure ReadMeasurementsV2;
60+
procedure ReadMeasurementsV3;
61+
procedure ParseStationAndTemp(const line: shortstring);
62+
procedure AddCityTemperatureLG(const cityName: shortstring; const newTemp: int64);
5763
procedure SortWeatherStationAndStats;
5864
procedure PrintSortedWeatherStationAndStats;
5965
public
@@ -92,7 +98,7 @@ function CustomTStringListComparer(AList: TStringList;
9298
end;
9399

94100
// Remove dots from a string
95-
function RemoveDots(const line: ShortString): ShortString;
101+
function RemoveDots(const line: shortstring): shortstring;
96102
var
97103
index: integer;
98104
begin
@@ -104,7 +110,7 @@ function RemoveDots(const line: ShortString): ShortString;
104110
end;
105111
end;
106112

107-
function TStat.ToString: ShortString;
113+
function TStat.ToString: shortstring;
108114
var
109115
minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
110116
begin
@@ -124,15 +130,15 @@ constructor TWeatherStation.Create(const filename: string);
124130
// Set expected capacity - saves 10 seconds.
125131
self.lookupStrFloatToIntList.Capacity := 44691;
126132
// Create a dictionary
127-
weatherDictionary := TWeatherDictionaryLG.Create;
133+
weatherDictionary := TWeatherDictionary.Create;
128134
weatherDictionary.Capacity := 44691;
129135
// Create a TStringList for sorting
130136
weatherStationList := TStringList.Create;
131137
end;
132138

133139
destructor TWeatherStation.Destroy;
134140
var
135-
stationName: ShortString;
141+
stationName: shortstring;
136142
begin
137143

138144
// Free the lookup dictionary
@@ -206,7 +212,7 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
206212

207213
procedure TWeatherStation.SortWeatherStationAndStats;
208214
var
209-
wsKey: ShortString;
215+
wsKey: shortstring;
210216
begin
211217

212218
{$IFDEF DEBUG}
@@ -235,7 +241,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
235241
{$ENDIF DEBUG}
236242
end;
237243

238-
procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
244+
procedure TWeatherStation.AddCityTemperatureLG(const cityName: shortstring;
239245
const newTemp: int64);
240246
var
241247
stat: PStat;
@@ -288,12 +294,15 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
288294
end;
289295
end;
290296

291-
procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
297+
procedure TWeatherStation.ParseStationAndTemp(const line: shortstring);
292298
var
293299
delimiterPos: integer;
294-
parsedStation, strFloatTemp: ShortString;
295-
parsedTemp, valCode: int64;
300+
strFloatTemp: shortstring;
301+
parsedTemp: int64;
296302
begin
303+
304+
if length(line) = 0 then Exit;
305+
297306
// Get position of the delimiter
298307
delimiterPos := Pos(';', line);
299308
if delimiterPos > 0 then
@@ -314,6 +323,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
314323
end;
315324
end;
316325

326+
{This approach turned out to be the faster method than the TCSVDocument method.}
317327
procedure TWeatherStation.ReadMeasurements;
318328
var
319329
fileStream: TFileStream;
@@ -328,7 +338,117 @@ procedure TWeatherStation.ReadMeasurements;
328338
// Read and parse chunks of data until EOF -------------------------------
329339
while not streamReader.EOF do
330340
begin
331-
// line := streamReader.ReadLine;
341+
//streamReader.ReadLine;
342+
//streamReader.ReadLine;
343+
self.ParseStationAndTemp(streamReader.ReadLine);
344+
self.ParseStationAndTemp(streamReader.ReadLine);
345+
end;// End of read and parse chunks of data ------------------------------
346+
finally
347+
streamReader.Free;
348+
end;
349+
finally
350+
// Close the file
351+
fileStream.Free;
352+
end;
353+
end;
354+
355+
{TCSVDocument Method. Easiest to use. About 2 times slower then the first method.}
356+
procedure TWeatherStation.ReadMeasurementsV2;
357+
var
358+
fileStream: TFileStream;
359+
buffStream: TReadBufStream;
360+
csvReader: TCSVDocument;
361+
index, totalLines, parsedTemp: int64;
362+
begin
363+
totalLines := 0;
364+
fileStream := TFileStream.Create(self.fname, fmOpenRead);
365+
try
366+
buffStream := TReadBufStream.Create(fileStream, 65536);
367+
try
368+
csvReader := TCSVDocument.Create;
369+
try
370+
csvReader.Delimiter := ';';
371+
csvReader.LoadFromStream(buffStream);
372+
373+
totalLines := csvReader.RowCount;
374+
375+
for index := 0 to totalLines - 1 do
376+
begin
377+
if self.lookupStrFloatToIntList.TryGetValue(csvReader.Cells[1, index],
378+
parsedTemp) then
379+
begin
380+
self.AddCityTemperatureLG(csvReader.Cells[0, index], parsedTemp);
381+
end;
382+
end;
383+
384+
finally
385+
csvReader.Free;
386+
end;
387+
finally
388+
buffStream.Free;
389+
end;
390+
finally
391+
end;
392+
fileStream.Free;
393+
end;
394+
395+
{This method is twice times slower than the first one.}
396+
procedure TWeatherStation.ReadMeasurementsV3;
397+
var
398+
fileStream: TFileStream;
399+
buffStream: TReadBufStream;
400+
csvDataset: TCSVDataset;
401+
parsedTemp: int64;
402+
begin
403+
fileStream := TFileStream.Create(self.fname, fmOpenRead);
404+
try
405+
buffStream := TReadBufStream.Create(fileStream);
406+
try
407+
csvDataset := TCSVDataset.Create(nil);
408+
try
409+
csvDataset.CSVOptions.Delimiter := ';';
410+
csvDataset.CSVOptions.FirstLineAsFieldNames := False;
411+
csvDataset.LoadFromCSVStream(buffStream);
412+
413+
// Move to first record
414+
csvDataset.First;
415+
416+
while not csvDataset.EOF do
417+
begin
418+
// WriteLn('Field1 is ', csvDataset.Fields[0].AsString, ' and Field2 is ', csvDataset.Fields[1].AsString);
419+
if self.lookupStrFloatToIntList.TryGetValue(csvDataset.Fields[1].AsString, parsedTemp) then
420+
begin
421+
self.AddCityTemperatureLG(csvDataset.Fields[0].AsString, parsedTemp);
422+
end;
423+
csvDataset.Next;
424+
end;
425+
finally
426+
csvDataset.Free;
427+
end;
428+
finally
429+
buffStream.Free;
430+
end;
431+
finally
432+
end;
433+
fileStream.Free;
434+
end;
435+
436+
{This aproach is surprisingly 10 seconds slower than the first one.}
437+
procedure TWeatherStation.ReadMeasurementsBuffered;
438+
var
439+
fileStream: TBufferedFileStream;
440+
streamReader: TStreamReader;
441+
begin
442+
443+
// Open the file for reading
444+
fileStream := TBufferedFileStream.Create(self.fname, fmOpenRead);
445+
try
446+
streamReader := TStreamReader.Create(fileStream);
447+
try
448+
// Read and parse chunks of data until EOF -------------------------------
449+
while not streamReader.EOF do
450+
begin
451+
self.ParseStationAndTemp(streamReader.ReadLine);
332452
self.ParseStationAndTemp(streamReader.ReadLine);
333453
end;// End of read and parse chunks of data ------------------------------
334454
finally
@@ -340,11 +460,15 @@ procedure TWeatherStation.ReadMeasurements;
340460
end;
341461
end;
342462

463+
343464
// The main algorithm
344465
procedure TWeatherStation.ProcessMeasurements;
345466
begin
346467
self.CreateLookupTemp;
347468
self.ReadMeasurements;
469+
//self.ReadMeasurementsBuffered;
470+
//self.ReadMeasurementsV2;
471+
//self.ReadMeasurementsV3;
348472
self.SortWeatherStationAndStats;
349473
self.PrintSortedWeatherStationAndStats;
350474
end;

0 commit comments

Comments
 (0)