@@ -7,11 +7,12 @@ interface
77uses
88 Classes
99 , SysUtils
10- , Math
1110 , streamex
1211 , bufstream
1312 // , lgHashMap
1413 , generics.Collections
14+ , csvdocument
15+ , csvdataset
1516 { $IFDEF DEBUG}
1617 , Stopwatch
1718 { $ENDIF}
@@ -29,31 +30,34 @@ TStat = record
2930 sum: int64;
3031 cnt: int64;
3132 public
32- function ToString : ShortString ;
33+ function ToString : shortstring ;
3334 end ;
3435 { Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3536 PStat = ^TStat;
3637
3738type
3839 // Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
39- TWeatherDictionaryLG = specialize TFastHashMap<ShortString , PStat>;
40+ TWeatherDictionary = specialize TFastHashMap<shortstring , PStat>;
4041
4142type
4243 // a type for storing valid lookup temperature
43- TValidTemperatureDictionary = specialize TFastHashMap<ShortString , int64>;
44+ TValidTemperatureDictionary = specialize TFastHashMap<shortstring , int64>;
4445
4546type
4647 // Create a class to encapsulate the temperature observations of each weather station.
4748 TWeatherStation = class
4849 private
4950 fname: string;
50- weatherDictionary: TWeatherDictionaryLG ;
51+ weatherDictionary: TWeatherDictionary ;
5152 weatherStationList: TStringList;
5253 lookupStrFloatToIntList: TValidTemperatureDictionary;
5354 procedure CreateLookupTemp ;
5455 procedure ReadMeasurements ;
55- procedure ParseStationAndTemp (const line: ShortString);
56- procedure AddCityTemperatureLG (const cityName: ShortString; const newTemp: int64);
56+ procedure ReadMeasurementsBuffered ;
57+ procedure ReadMeasurementsV2 ;
58+ procedure ReadMeasurementsV3 ;
59+ procedure ParseStationAndTemp (const line: shortstring);
60+ procedure AddCityTemperatureLG (const cityName: shortstring; const newTemp: int64);
5761 procedure SortWeatherStationAndStats ;
5862 procedure PrintSortedWeatherStationAndStats ;
5963 public
@@ -92,7 +96,7 @@ function CustomTStringListComparer(AList: TStringList;
9296end ;
9397
9498// Remove dots from a string
95- function RemoveDots (const line: ShortString ): ShortString ;
99+ function RemoveDots (const line: shortstring ): shortstring ;
96100var
97101 index: integer;
98102begin
@@ -104,7 +108,7 @@ function RemoveDots(const line: ShortString): ShortString;
104108 end ;
105109end ;
106110
107- function TStat.ToString : ShortString ;
111+ function TStat.ToString : shortstring ;
108112var
109113 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
110114begin
@@ -124,15 +128,15 @@ constructor TWeatherStation.Create(const filename: string);
124128 // Set expected capacity - saves 10 seconds.
125129 self.lookupStrFloatToIntList.Capacity := 44691 ;
126130 // Create a dictionary
127- weatherDictionary := TWeatherDictionaryLG .Create;
131+ weatherDictionary := TWeatherDictionary .Create;
128132 weatherDictionary.Capacity := 44691 ;
129133 // Create a TStringList for sorting
130134 weatherStationList := TStringList.Create;
131135end ;
132136
133137destructor TWeatherStation.Destroy;
134138var
135- stationName: ShortString ;
139+ stationName: shortstring ;
136140begin
137141
138142 // Free the lookup dictionary
@@ -206,7 +210,7 @@ procedure TWeatherStation.PrintSortedWeatherStationAndStats;
206210
207211procedure TWeatherStation.SortWeatherStationAndStats ;
208212var
209- wsKey: ShortString ;
213+ wsKey: shortstring ;
210214begin
211215
212216 { $IFDEF DEBUG}
@@ -235,7 +239,7 @@ procedure TWeatherStation.SortWeatherStationAndStats;
235239 { $ENDIF DEBUG}
236240end ;
237241
238- procedure TWeatherStation.AddCityTemperatureLG (const cityName: ShortString ;
242+ procedure TWeatherStation.AddCityTemperatureLG (const cityName: shortstring ;
239243 const newTemp: int64);
240244var
241245 stat: PStat;
@@ -288,12 +292,15 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: ShortString;
288292 end ;
289293end ;
290294
291- procedure TWeatherStation.ParseStationAndTemp (const line: ShortString );
295+ procedure TWeatherStation.ParseStationAndTemp (const line: shortstring );
292296var
293297 delimiterPos: integer;
294- parsedStation, strFloatTemp: ShortString ;
295- parsedTemp, valCode : int64;
298+ strFloatTemp: shortstring ;
299+ parsedTemp: int64;
296300begin
301+
302+ if length(line) = 0 then Exit;
303+
297304 // Get position of the delimiter
298305 delimiterPos := Pos(' ;' , line);
299306 if delimiterPos > 0 then
@@ -314,6 +321,7 @@ procedure TWeatherStation.ParseStationAndTemp(const line: ShortString);
314321 end ;
315322end ;
316323
324+ { This approach turned out to be the faster method than the TCSVDocument method.}
317325procedure TWeatherStation.ReadMeasurements ;
318326var
319327 fileStream: TFileStream;
@@ -328,7 +336,117 @@ procedure TWeatherStation.ReadMeasurements;
328336 // Read and parse chunks of data until EOF -------------------------------
329337 while not streamReader.EOF do
330338 begin
331- // line := streamReader.ReadLine;
339+ // streamReader.ReadLine;
340+ // streamReader.ReadLine;
341+ self.ParseStationAndTemp(streamReader.ReadLine);
342+ self.ParseStationAndTemp(streamReader.ReadLine);
343+ end ;// End of read and parse chunks of data ------------------------------
344+ finally
345+ streamReader.Free;
346+ end ;
347+ finally
348+ // Close the file
349+ fileStream.Free;
350+ end ;
351+ end ;
352+
353+ { TCSVDocument Method. Easiest to use. About 2 times slower then the first method.}
354+ procedure TWeatherStation.ReadMeasurementsV2 ;
355+ var
356+ fileStream: TFileStream;
357+ buffStream: TReadBufStream;
358+ csvReader: TCSVDocument;
359+ index, totalLines, parsedTemp: int64;
360+ begin
361+ totalLines := 0 ;
362+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
363+ try
364+ buffStream := TReadBufStream.Create(fileStream, 65536 );
365+ try
366+ csvReader := TCSVDocument.Create;
367+ try
368+ csvReader.Delimiter := ' ;' ;
369+ csvReader.LoadFromStream(buffStream);
370+
371+ totalLines := csvReader.RowCount;
372+
373+ for index := 0 to totalLines - 1 do
374+ begin
375+ if self.lookupStrFloatToIntList.TryGetValue(csvReader.Cells[1 , index],
376+ parsedTemp) then
377+ begin
378+ self.AddCityTemperatureLG(csvReader.Cells[0 , index], parsedTemp);
379+ end ;
380+ end ;
381+
382+ finally
383+ csvReader.Free;
384+ end ;
385+ finally
386+ buffStream.Free;
387+ end ;
388+ finally
389+ end ;
390+ fileStream.Free;
391+ end ;
392+
393+ { This method is twice times slower than the first one.}
394+ procedure TWeatherStation.ReadMeasurementsV3 ;
395+ var
396+ fileStream: TFileStream;
397+ buffStream: TReadBufStream;
398+ csvDataset: TCSVDataset;
399+ parsedTemp: int64;
400+ begin
401+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
402+ try
403+ buffStream := TReadBufStream.Create(fileStream);
404+ try
405+ csvDataset := TCSVDataset.Create(nil );
406+ try
407+ csvDataset.CSVOptions.Delimiter := ' ;' ;
408+ csvDataset.CSVOptions.FirstLineAsFieldNames := False;
409+ csvDataset.LoadFromCSVStream(buffStream);
410+
411+ // Move to first record
412+ csvDataset.First;
413+
414+ while not csvDataset.EOF do
415+ begin
416+ // WriteLn('Field1 is ', csvDataset.Fields[0].AsString, ' and Field2 is ', csvDataset.Fields[1].AsString);
417+ if self.lookupStrFloatToIntList.TryGetValue(csvDataset.Fields[1 ].AsString, parsedTemp) then
418+ begin
419+ self.AddCityTemperatureLG(csvDataset.Fields[0 ].AsString, parsedTemp);
420+ end ;
421+ csvDataset.Next;
422+ end ;
423+ finally
424+ csvDataset.Free;
425+ end ;
426+ finally
427+ buffStream.Free;
428+ end ;
429+ finally
430+ end ;
431+ fileStream.Free;
432+ end ;
433+
434+ { This aproach is surprisingly 10 seconds slower than the first one.}
435+ procedure TWeatherStation.ReadMeasurementsBuffered ;
436+ var
437+ fileStream: TBufferedFileStream;
438+ streamReader: TStreamReader;
439+ begin
440+
441+ // Open the file for reading
442+ fileStream := TBufferedFileStream.Create(self.fname, fmOpenRead);
443+ try
444+ streamReader := TStreamReader.Create(fileStream);
445+ try
446+ // Read and parse chunks of data until EOF -------------------------------
447+ while not streamReader.EOF do
448+ begin
449+ self.ParseStationAndTemp(streamReader.ReadLine);
332450 self.ParseStationAndTemp(streamReader.ReadLine);
333451 end ;// End of read and parse chunks of data ------------------------------
334452 finally
@@ -340,11 +458,15 @@ procedure TWeatherStation.ReadMeasurements;
340458 end ;
341459end ;
342460
461+
343462// The main algorithm
344463procedure TWeatherStation.ProcessMeasurements ;
345464begin
346465 self.CreateLookupTemp;
347466 self.ReadMeasurements;
467+ // self.ReadMeasurementsBuffered;
468+ // self.ReadMeasurementsV2;
469+ // self.ReadMeasurementsV3;
348470 self.SortWeatherStationAndStats;
349471 self.PrintSortedWeatherStationAndStats;
350472end ;
0 commit comments