@@ -7,11 +7,12 @@ interface
77uses
88 Classes
99 , SysUtils
10- , Math
1110 , streamex
1211 , bufstream
1312 // , lgHashMap
1413 , generics.Collections
14+ , csvdocument
15+ , csvdataset
1516 { $IFDEF DEBUG}
1617 , Stopwatch
1718 { $ENDIF}
@@ -29,31 +30,36 @@ TStat = record
2930 sum: int64;
3031 cnt: int64;
3132 public
32- function ToString : ShortString ;
33+ function ToString : shortstring ;
3334 end ;
3435 { Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3536 PStat = ^TStat;
3637
3738type
3839 // Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
39- TWeatherDictionaryLG = specialize TFastHashMap<ShortString, PStat>;
40+ // THashMap<shortstring, PStat> - takes around 120s.
41+ // TFastHash<shortstring, PStat> - takes around 100s.
42+ TWeatherDictionary = specialize TFastHashMap<shortstring, PStat>;
4043
4144type
4245 // a type for storing valid lookup temperature
43- TValidTemperatureDictionary = specialize TFastHashMap<ShortString , int64>;
46+ TValidTemperatureDictionary = specialize TFastHashMap<shortstring , int64>;
4447
4548type
4649 // Create a class to encapsulate the temperature observations of each weather station.
4750 TWeatherStation = class
4851 private
4952 fname: string;
50- weatherDictionary: TWeatherDictionaryLG ;
53+ weatherDictionary: TWeatherDictionary ;
5154 weatherStationList: TStringList;
5255 lookupStrFloatToIntList: TValidTemperatureDictionary;
5356 procedure CreateLookupTemp ;
5457 procedure ReadMeasurements ;
55- procedure ParseStationAndTemp (const line: ShortString);
56- procedure AddCityTemperatureLG (const cityName: ShortString; const newTemp: int64);
58+ procedure ReadMeasurementsBuffered ;
59+ procedure ReadMeasurementsV2 ;
60+ procedure ReadMeasurementsV3 ;
61+ procedure ParseStationAndTemp (const line: shortstring);
62+ procedure AddCityTemperatureLG (const cityName: shortstring; const newTemp: int64);
5763 procedure SortWeatherStationAndStats ;
5864 procedure PrintSortedWeatherStationAndStats ;
5965 public
@@ -92,7 +98,7 @@ function CustomTStringListComparer(AList: TStringList;
9298end ;
9399
94100// Remove dots from a string
95- function RemoveDots (const line: ShortString ): ShortString ;
101+ function RemoveDots (const line: shortstring ): shortstring ;
96102var
97103 index: integer;
98104begin
@@ -104,7 +110,7 @@ function RemoveDots(const line: ShortString): ShortString;
104110 end ;
105111end ;
106112
107- function TStat.ToString : ShortString ;
113+ function TStat.ToString : shortstring ;
108114var
109115 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
110116begin
@@ -124,15 +130,15 @@ constructor TWeatherStation.Create(const filename: string);
124130 // Set expected capacity - saves 10 seconds.
125131 self.lookupStrFloatToIntList.Capacity := 44691 ;
126132 // Create a dictionary
127- weatherDictionary := TWeatherDictionaryLG .Create;
133+ weatherDictionary := TWeatherDictionary .Create;
128134 weatherDictionary.Capacity := 44691 ;
129135 // Create a TStringList for sorting
130136 weatherStationList := TStringList.Create;
131137end ;
132138
133139destructor TWeatherStation.Destroy;
134140var
135- stationName: ShortString ;
141+ stationName: shortstring ;
136142begin
137143
138144 // Free the lookup dictionary
@@ -206,7 +212,7 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
206212
207213procedure TWeatherStation.SortWeatherStationAndStats ;
208214var
209- wsKey: ShortString ;
215+ wsKey: shortstring ;
210216begin
211217
212218 { $IFDEF DEBUG}
@@ -235,7 +241,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
235241 { $ENDIF DEBUG}
236242end ;
237243
238- procedure TWeatherStation.AddCityTemperatureLG (const cityName: ShortString ;
244+ procedure TWeatherStation.AddCityTemperatureLG (const cityName: shortstring ;
239245 const newTemp: int64);
240246var
241247 stat: PStat;
@@ -288,12 +294,15 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
288294 end ;
289295end ;
290296
291- procedure TWeatherStation.ParseStationAndTemp (const line: ShortString );
297+ procedure TWeatherStation.ParseStationAndTemp (const line: shortstring );
292298var
293299 delimiterPos: integer;
294- parsedStation, strFloatTemp: ShortString ;
295- parsedTemp, valCode : int64;
300+ strFloatTemp: shortstring ;
301+ parsedTemp: int64;
296302begin
303+
304+ if length(line) = 0 then Exit;
305+
297306 // Get position of the delimiter
298307 delimiterPos := Pos(' ;' , line);
299308 if delimiterPos > 0 then
@@ -314,6 +323,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
314323 end ;
315324end ;
316325
326+ { This approach turned out to be the faster method than the TCSVDocument method.}
317327procedure TWeatherStation.ReadMeasurements ;
318328var
319329 fileStream: TFileStream;
@@ -328,7 +338,117 @@ procedure TWeatherStation.ReadMeasurements;
328338 // Read and parse chunks of data until EOF -------------------------------
329339 while not streamReader.EOF do
330340 begin
331- // line := streamReader.ReadLine;
341+ // streamReader.ReadLine;
342+ // streamReader.ReadLine;
343+ self.ParseStationAndTemp(streamReader.ReadLine);
344+ self.ParseStationAndTemp(streamReader.ReadLine);
345+ end ;// End of read and parse chunks of data ------------------------------
346+ finally
347+ streamReader.Free;
348+ end ;
349+ finally
350+ // Close the file
351+ fileStream.Free;
352+ end ;
353+ end ;
354+
355+ { TCSVDocument Method. Easiest to use. About 2 times slower then the first method.}
356+ procedure TWeatherStation.ReadMeasurementsV2 ;
357+ var
358+ fileStream: TFileStream;
359+ buffStream: TReadBufStream;
360+ csvReader: TCSVDocument;
361+ index, totalLines, parsedTemp: int64;
362+ begin
363+ totalLines := 0 ;
364+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
365+ try
366+ buffStream := TReadBufStream.Create(fileStream, 65536 );
367+ try
368+ csvReader := TCSVDocument.Create;
369+ try
370+ csvReader.Delimiter := ' ;' ;
371+ csvReader.LoadFromStream(buffStream);
372+
373+ totalLines := csvReader.RowCount;
374+
375+ for index := 0 to totalLines - 1 do
376+ begin
377+ if self.lookupStrFloatToIntList.TryGetValue(csvReader.Cells[1 , index],
378+ parsedTemp) then
379+ begin
380+ self.AddCityTemperatureLG(csvReader.Cells[0 , index], parsedTemp);
381+ end ;
382+ end ;
383+
384+ finally
385+ csvReader.Free;
386+ end ;
387+ finally
388+ buffStream.Free;
389+ end ;
390+ finally
391+ end ;
392+ fileStream.Free;
393+ end ;
394+
395+ { This method is twice times slower than the first one.}
396+ procedure TWeatherStation.ReadMeasurementsV3 ;
397+ var
398+ fileStream: TFileStream;
399+ buffStream: TReadBufStream;
400+ csvDataset: TCSVDataset;
401+ parsedTemp: int64;
402+ begin
403+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
404+ try
405+ buffStream := TReadBufStream.Create(fileStream);
406+ try
407+ csvDataset := TCSVDataset.Create(nil );
408+ try
409+ csvDataset.CSVOptions.Delimiter := ' ;' ;
410+ csvDataset.CSVOptions.FirstLineAsFieldNames := False;
411+ csvDataset.LoadFromCSVStream(buffStream);
412+
413+ // Move to first record
414+ csvDataset.First;
415+
416+ while not csvDataset.EOF do
417+ begin
418+ // WriteLn('Field1 is ', csvDataset.Fields[0].AsString, ' and Field2 is ', csvDataset.Fields[1].AsString);
419+ if self.lookupStrFloatToIntList.TryGetValue(csvDataset.Fields[1 ].AsString, parsedTemp) then
420+ begin
421+ self.AddCityTemperatureLG(csvDataset.Fields[0 ].AsString, parsedTemp);
422+ end ;
423+ csvDataset.Next;
424+ end ;
425+ finally
426+ csvDataset.Free;
427+ end ;
428+ finally
429+ buffStream.Free;
430+ end ;
431+ finally
432+ end ;
433+ fileStream.Free;
434+ end ;
435+
436+ { This aproach is surprisingly 10 seconds slower than the first one.}
437+ procedure TWeatherStation.ReadMeasurementsBuffered ;
438+ var
439+ fileStream: TBufferedFileStream;
440+ streamReader: TStreamReader;
441+ begin
442+
443+ // Open the file for reading
444+ fileStream := TBufferedFileStream.Create(self.fname, fmOpenRead);
445+ try
446+ streamReader := TStreamReader.Create(fileStream);
447+ try
448+ // Read and parse chunks of data until EOF -------------------------------
449+ while not streamReader.EOF do
450+ begin
451+ self.ParseStationAndTemp(streamReader.ReadLine);
332452 self.ParseStationAndTemp(streamReader.ReadLine);
333453 end ;// End of read and parse chunks of data ------------------------------
334454 finally
@@ -340,11 +460,15 @@ procedure TWeatherStation.ReadMeasurements;
340460 end ;
341461end ;
342462
463+
343464// The main algorithm
344465procedure TWeatherStation.ProcessMeasurements ;
345466begin
346467 self.CreateLookupTemp;
347468 self.ReadMeasurements;
469+ // self.ReadMeasurementsBuffered;
470+ // self.ReadMeasurementsV2;
471+ // self.ReadMeasurementsV3;
348472 self.SortWeatherStationAndStats;
349473 self.PrintSortedWeatherStationAndStats;
350474end ;
0 commit comments