Skip to content

Commit 30a81a2

Browse files
committed
Update - Rev 12. Approx 5 seconds speed improvements.
1 parent 98b14b3 commit 30a81a2

File tree

1 file changed

+139
-17
lines changed

1 file changed

+139
-17
lines changed

entries/ikelaiah/src/weatherstation.pas

Lines changed: 139 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ interface
77
uses
88
Classes
99
, SysUtils
10-
, Math
1110
, streamex
1211
, bufstream
1312
//, lgHashMap
1413
, generics.Collections
14+
, csvdocument
15+
, csvdataset
1516
{$IFDEF DEBUG}
1617
, Stopwatch
1718
{$ENDIF}
@@ -29,31 +30,34 @@ TStat = record
2930
sum: int64;
3031
cnt: int64;
3132
public
32-
function ToString: ShortString;
33+
function ToString: shortstring;
3334
end;
3435
{Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3536
PStat = ^TStat;
3637

3738
type
3839
// Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
39-
TWeatherDictionaryLG = specialize TFastHashMap<ShortString, PStat>;
40+
TWeatherDictionary = specialize TFastHashMap<shortstring, PStat>;
4041

4142
type
4243
// a type for storing valid lookup temperature
43-
TValidTemperatureDictionary = specialize TFastHashMap<ShortString, int64>;
44+
TValidTemperatureDictionary = specialize TFastHashMap<shortstring, int64>;
4445

4546
type
4647
// Create a class to encapsulate the temperature observations of each weather station.
4748
TWeatherStation = class
4849
private
4950
fname: string;
50-
weatherDictionary: TWeatherDictionaryLG;
51+
weatherDictionary: TWeatherDictionary;
5152
weatherStationList: TStringList;
5253
lookupStrFloatToIntList: TValidTemperatureDictionary;
5354
procedure CreateLookupTemp;
5455
procedure ReadMeasurements;
55-
procedure ParseStationAndTemp(const line: ShortString);
56-
procedure AddCityTemperatureLG(const cityName: ShortString; const newTemp: int64);
56+
procedure ReadMeasurementsBuffered;
57+
procedure ReadMeasurementsV2;
58+
procedure ReadMeasurementsV3;
59+
procedure ParseStationAndTemp(const line: shortstring);
60+
procedure AddCityTemperatureLG(const cityName: shortstring; const newTemp: int64);
5761
procedure SortWeatherStationAndStats;
5862
procedure PrintSortedWeatherStationAndStats;
5963
public
@@ -92,7 +96,7 @@ function CustomTStringListComparer(AList: TStringList;
9296
end;
9397

9498
// Remove dots from a string
95-
function RemoveDots(const line: ShortString): ShortString;
99+
function RemoveDots(const line: shortstring): shortstring;
96100
var
97101
index: integer;
98102
begin
@@ -104,7 +108,7 @@ function RemoveDots(const line: ShortString): ShortString;
104108
end;
105109
end;
106110

107-
function TStat.ToString: ShortString;
111+
function TStat.ToString: shortstring;
108112
var
109113
minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
110114
begin
@@ -124,15 +128,15 @@ constructor TWeatherStation.Create(const filename: string);
124128
// Set expected capacity - saves 10 seconds.
125129
self.lookupStrFloatToIntList.Capacity := 44691;
126130
// Create a dictionary
127-
weatherDictionary := TWeatherDictionaryLG.Create;
131+
weatherDictionary := TWeatherDictionary.Create;
128132
weatherDictionary.Capacity := 44691;
129133
// Create a TStringList for sorting
130134
weatherStationList := TStringList.Create;
131135
end;
132136

133137
destructor TWeatherStation.Destroy;
134138
var
135-
stationName: ShortString;
139+
stationName: shortstring;
136140
begin
137141

138142
// Free the lookup dictionary
@@ -206,7 +210,7 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
206210

207211
procedure TWeatherStation.SortWeatherStationAndStats;
208212
var
209-
wsKey: ShortString;
213+
wsKey: shortstring;
210214
begin
211215

212216
{$IFDEF DEBUG}
@@ -235,7 +239,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
235239
{$ENDIF DEBUG}
236240
end;
237241

238-
procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
242+
procedure TWeatherStation.AddCityTemperatureLG(const cityName: shortstring;
239243
const newTemp: int64);
240244
var
241245
stat: PStat;
@@ -288,12 +292,15 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
288292
end;
289293
end;
290294

291-
procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
295+
procedure TWeatherStation.ParseStationAndTemp(const line: shortstring);
292296
var
293297
delimiterPos: integer;
294-
parsedStation, strFloatTemp: ShortString;
295-
parsedTemp, valCode: int64;
298+
strFloatTemp: shortstring;
299+
parsedTemp: int64;
296300
begin
301+
302+
if length(line) = 0 then Exit;
303+
297304
// Get position of the delimiter
298305
delimiterPos := Pos(';', line);
299306
if delimiterPos > 0 then
@@ -314,6 +321,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
314321
end;
315322
end;
316323

324+
{This approach turned out to be the faster method than the TCSVDocument method.}
317325
procedure TWeatherStation.ReadMeasurements;
318326
var
319327
fileStream: TFileStream;
@@ -328,7 +336,117 @@ procedure TWeatherStation.ReadMeasurements;
328336
// Read and parse chunks of data until EOF -------------------------------
329337
while not streamReader.EOF do
330338
begin
331-
// line := streamReader.ReadLine;
339+
//streamReader.ReadLine;
340+
//streamReader.ReadLine;
341+
self.ParseStationAndTemp(streamReader.ReadLine);
342+
self.ParseStationAndTemp(streamReader.ReadLine);
343+
end;// End of read and parse chunks of data ------------------------------
344+
finally
345+
streamReader.Free;
346+
end;
347+
finally
348+
// Close the file
349+
fileStream.Free;
350+
end;
351+
end;
352+
353+
{TCSVDocument Method. Easiest to use. About 2 times slower then the first method.}
354+
procedure TWeatherStation.ReadMeasurementsV2;
355+
var
356+
fileStream: TFileStream;
357+
buffStream: TReadBufStream;
358+
csvReader: TCSVDocument;
359+
index, totalLines, parsedTemp: int64;
360+
begin
361+
totalLines := 0;
362+
fileStream := TFileStream.Create(self.fname, fmOpenRead);
363+
try
364+
buffStream := TReadBufStream.Create(fileStream, 65536);
365+
try
366+
csvReader := TCSVDocument.Create;
367+
try
368+
csvReader.Delimiter := ';';
369+
csvReader.LoadFromStream(buffStream);
370+
371+
totalLines := csvReader.RowCount;
372+
373+
for index := 0 to totalLines - 1 do
374+
begin
375+
if self.lookupStrFloatToIntList.TryGetValue(csvReader.Cells[1, index],
376+
parsedTemp) then
377+
begin
378+
self.AddCityTemperatureLG(csvReader.Cells[0, index], parsedTemp);
379+
end;
380+
end;
381+
382+
finally
383+
csvReader.Free;
384+
end;
385+
finally
386+
buffStream.Free;
387+
end;
388+
finally
389+
end;
390+
fileStream.Free;
391+
end;
392+
393+
{This method is twice times slower than the first one.}
394+
procedure TWeatherStation.ReadMeasurementsV3;
395+
var
396+
fileStream: TFileStream;
397+
buffStream: TReadBufStream;
398+
csvDataset: TCSVDataset;
399+
parsedTemp: int64;
400+
begin
401+
fileStream := TFileStream.Create(self.fname, fmOpenRead);
402+
try
403+
buffStream := TReadBufStream.Create(fileStream);
404+
try
405+
csvDataset := TCSVDataset.Create(nil);
406+
try
407+
csvDataset.CSVOptions.Delimiter := ';';
408+
csvDataset.CSVOptions.FirstLineAsFieldNames := False;
409+
csvDataset.LoadFromCSVStream(buffStream);
410+
411+
// Move to first record
412+
csvDataset.First;
413+
414+
while not csvDataset.EOF do
415+
begin
416+
// WriteLn('Field1 is ', csvDataset.Fields[0].AsString, ' and Field2 is ', csvDataset.Fields[1].AsString);
417+
if self.lookupStrFloatToIntList.TryGetValue(csvDataset.Fields[1].AsString, parsedTemp) then
418+
begin
419+
self.AddCityTemperatureLG(csvDataset.Fields[0].AsString, parsedTemp);
420+
end;
421+
csvDataset.Next;
422+
end;
423+
finally
424+
csvDataset.Free;
425+
end;
426+
finally
427+
buffStream.Free;
428+
end;
429+
finally
430+
end;
431+
fileStream.Free;
432+
end;
433+
434+
{This aproach is surprisingly 10 seconds slower than the first one.}
435+
procedure TWeatherStation.ReadMeasurementsBuffered;
436+
var
437+
fileStream: TBufferedFileStream;
438+
streamReader: TStreamReader;
439+
begin
440+
441+
// Open the file for reading
442+
fileStream := TBufferedFileStream.Create(self.fname, fmOpenRead);
443+
try
444+
streamReader := TStreamReader.Create(fileStream);
445+
try
446+
// Read and parse chunks of data until EOF -------------------------------
447+
while not streamReader.EOF do
448+
begin
449+
self.ParseStationAndTemp(streamReader.ReadLine);
332450
self.ParseStationAndTemp(streamReader.ReadLine);
333451
end;// End of read and parse chunks of data ------------------------------
334452
finally
@@ -340,11 +458,15 @@ procedure TWeatherStation.ReadMeasurements;
340458
end;
341459
end;
342460

461+
343462
// The main algorithm
344463
procedure TWeatherStation.ProcessMeasurements;
345464
begin
346465
self.CreateLookupTemp;
347466
self.ReadMeasurements;
467+
//self.ReadMeasurementsBuffered;
468+
//self.ReadMeasurementsV2;
469+
//self.ReadMeasurementsV3;
348470
self.SortWeatherStationAndStats;
349471
self.PrintSortedWeatherStationAndStats;
350472
end;

0 commit comments

Comments
 (0)