@@ -15,14 +15,10 @@ interface
1515 { $ENDIF}
1616 ;
1717
18- const
19- cSeed: LongInt = 46668267 ; // '1BRC' in ASCII
20- cColdestTemp = -99.9 ;
21- cHottestTemp = 99.9 ;
22- cLineBreak = #13 #10 ;
23-
2418type
19+
2520 { TGenerator }
21+
2622 TGenerator = class (TObject)
2723 private
2824 FInputFile: String;
@@ -43,17 +39,24 @@ TGenerator = class(TObject)
4339 end ;
4440
4541 { $IFNDEF FPC}
46-
4742 TStringArray = array of string;
48- TWriteBufStream = TFileStream ;
43+ TWriteBufStream = TBufferedFileStream ;
4944 { $ENDIF}
5045
5146implementation
5247
5348const
54- batchPercent = 10 ;
49+ cSeed: LongInt = 46668267 ; // '1BRC' in ASCII
50+ cColdestTemp = -99.9 ;
51+ cHottestTemp = 99.9 ;
52+ linesPercent = 10 ;
53+ stationsCapacity = 50000 ;
54+ chunkBatch = 10000 ;
55+ chunkCapacity = 20 * 1024 * 1024 ;
56+ lineEnding = #13 #10 ;
57+ lineBreak = #13 ;
5558
56- { TGenerator }
59+ { TGenerator }
5760
5861constructor TGenerator.Create(AInputFile, AOutputFile: String; ALineCount: Int64);
5962begin
@@ -62,8 +65,7 @@ constructor TGenerator.Create(AInputFile, AOutputFile: String; ALineCount: Int64
6265 FLineCount := ALineCount;
6366
6467 FStationNames := TStringList.Create;
65- FStationNames.Capacity := 50000 ;
66- // FStationNames.CaseSensitive:= False;
68+ FStationNames.Capacity := stationsCapacity;
6769 FStationNames.UseLocale := False;
6870 FStationNames.Duplicates := dupIgnore;
6971 FStationNames.Sorted := True;
@@ -81,8 +83,9 @@ procedure TGenerator.BuildStationNames;
8183 streamReader: TStreamReader;
8284 entry: String;
8385 count: Int64;
84- start, stop: { $IFDEF FPC } QWord { $ELSE } Int64{ $ENDIF } ;
86+ start, stop: Int64;
8587begin
88+ count := 0 ; // cannot initialize vars in declaration (delphi complains)
8689 WriteLn(' Building Weather Stations...' );
8790 // Load the Weather Station names
8891 if FileExists(FInputFile) then
@@ -100,7 +103,6 @@ procedure TGenerator.BuildStationNames;
100103 begin
101104 entry := entry.Split(' ;' )[0 ];
102105 FStationNames.Add(entry);
103- // WriteLn('Got: ', entry);
104106 Inc(count);
105107 end ;
106108 end ;
@@ -114,7 +116,6 @@ procedure TGenerator.BuildStationNames;
114116 begin
115117 entry := entry.Split([' ;' ])[0 ];
116118 FStationNames.Add(entry);
117- // WriteLn('Got: ', entry);
118119 Inc(count);
119120 end ;
120121 end ;
@@ -160,11 +161,11 @@ procedure TGenerator.Generate;
160161 stationId: Int64;
161162 randomTemp: Integer;
162163 randomTempStr: String[4 ];
163- outputFileStream: TFileStream;
164- outputBufWriter: TWriteBufStream;
165- line, randomTempFinal: String;
164+ outputFileStream: TBufferedFileStream;
165+ chunkLine, randomTempFinal: String;
166166 stationArray, temperatureArray: TStringArray;
167- i, stationsCount, temperaturesCount: Integer;
167+ LenStationArray, LenTemperatureArray: Array of Integer;
168+ chunkCount, chunkLen, stationsCount, temperaturesCount: Integer;
168169 start: TDateTime;
169170begin
170171 // Randomize sets this variable depending on the current time
@@ -174,23 +175,31 @@ procedure TGenerator.Generate;
174175 // Build list of station names
175176 BuildStationNames;
176177
177- outputFileStream := TFileStream .Create(FOutPutFile, fmCreate);
178+ outputFileStream := TBufferedFileStream .Create(FOutPutFile, fmCreate);
178179
179- progressBatch := floor(FLineCount * (batchPercent / 100 ));
180+ progressBatch := floor(FLineCount * (linesPercent / 100 ));
180181 start := Now;
181182
183+ // This is all paweld magic:
184+ // From here
182185 // based on code @domasz from lazarus forum, github: PascalVault
183186 stationsCount := FStationNames.count;
184187 SetLength(stationArray, stationsCount);
185- for i := 0 to stationsCount - 1 do
186- stationArray[i] := FStationNames[i];
188+ SetLength(LenStationArray, stationsCount);
189+ for index := 0 to stationsCount - 1 do
190+ begin
191+ stationArray[index] := FStationNames[index] + ' ;' ;
192+ LenStationArray[index] := Length(stationArray[index]);
193+ end ;
187194
188195 temperaturesCount := 1999 ;
189196 SetLength(temperatureArray, temperaturesCount);
190- temperatureArray[0 ] := ' 0.0' ;
191- for i := 1 to 999 do
197+ SetLength(LenTemperatureArray, temperaturesCount);
198+ temperatureArray[0 ] := ' 0.0' + lineEnding;
199+ LenTemperatureArray[0 ] := Length(temperatureArray[0 ]);
200+ for index := 1 to 999 do
192201 begin
193- randomTempStr := IntToStr(i );
202+ randomTempStr := IntToStr(index );
194203 case Ord(randomTempStr[0 ]) of
195204 1 :
196205 randomTempFinal := ' 0.' + randomTempStr;
@@ -202,59 +211,58 @@ procedure TGenerator.Generate;
202211 randomTempFinal := randomTempStr[1 ] + randomTempStr[2 ] + randomTempStr[3 ] + ' .' +
203212 randomTempStr[4 ];
204213 end ;
205- temperatureArray[i * 2 - 1 ] := randomTempFinal;
206- temperatureArray[i * 2 ] := ' -' + randomTempFinal;
214+ temperatureArray[index * 2 - 1 ] := randomTempFinal + lineEnding;
215+ LenTemperatureArray[index * 2 - 1 ] := Length(temperatureArray[index * 2 - 1 ]);
216+ temperatureArray[index * 2 ] := ' -' + randomTempFinal + lineEnding;
217+ LenTemperatureArray[index * 2 ] := LenTemperatureArray[index * 2 - 1 ] + 1 ;
207218 end ;
208- //
209219
210- line := ' ' ;
220+ chunkCount := chunkBatch;
221+ chunkLen := 0 ;
222+ SetLength(chunkLine, chunkCapacity);
223+ // To here
211224
212225 try
213- // outputBufWriter:= TWriteBufStream.Create(outputFileStream, 4*1024);
214- { $IFDEF FPC}
215- outputBufWriter := TWriteBufStream.Create(outputFileStream, 64 * 1024 );
216- { $ENDIF}
217- try
218- Write(GenerateProgressBar(1 , FLineCount, 50 , 0 , Now - start), cLineBreak);
219- // Generate the file
220- for index := 1 to FLineCount do
226+ // Print first state of the progress bar
227+ Write(GenerateProgressBar(1 , FLineCount, 50 , 0 , Now - start), lineBreak);
228+ // Generate the file
229+ for index := 1 to FLineCount do
230+ begin
231+ stationId := Random(stationsCount);
232+ // This is all paweld magic:
233+ // From here
234+ randomTemp := Random(temperaturesCount);
235+ Move(stationArray[stationId][1 ], chunkLine[chunkLen + 1 ],
236+ LenStationArray[stationId]);
237+ Inc(chunkLen, LenStationArray[stationId]);
238+ Move(temperatureArray[randomTemp][1 ], chunkLine[chunkLen + 1 ],
239+ LenTemperatureArray[randomTemp]);
240+ Inc(chunkLen, LenTemperatureArray[randomTemp]);
241+
242+ Dec(chunkCount);
243+ if chunkCount = 0 then
221244 begin
222- stationId := Random(stationsCount);
223- // This is all paweld magic:
224- // From here
225- randomTemp := Random(temperaturesCount);
226- line := line + stationArray[stationId] + ' ;' + temperatureArray[randomTemp] +
227- cLineBreak;
228- // Write(line);
229- if index mod 10000 = 0 then
230- begin
231- { $IFNDEF FPC}
232- outputFileStream.WriteBuffer(line[1 ], Length(line));
233- { $ELSE}
234- outputBufWriter.WriteBuffer(line[1 ], Length(line));
235- { $ENDIF}
236- line := ' ' ;
237- end ;
238- // To here
239- Dec(progressBatch);
240- if progressBatch = 0 then
241- begin
242- Write(GenerateProgressBar(index, FLineCount, 50 , outputFileStream.Size,
243- Now - start), cLineBreak);
244- progressBatch := floor(FLineCount * (batchPercent / 100 ));
245- end ;
245+ outputFileStream.WriteBuffer(chunkLine[1 ], chunkLen);
246+ chunkCount := chunkBatch;
247+ chunkLen := 0 ;
246248 end ;
247- if line <> ' ' then
249+ // To here
250+ Dec(progressBatch);
251+ if progressBatch = 0 then
248252 begin
249- { $IFNDEF FPC}
250- outputFileStream.WriteBuffer(line[1 ], Length(line));
251- { $ELSE}
252- outputBufWriter.WriteBuffer(line[1 ], Length(line));
253- { $ENDIF}
253+ Write(GenerateProgressBar(index, FLineCount, 50 , outputFileStream.Size,
254+ Now - start), lineBreak);
255+ progressBatch := floor(FLineCount * (linesPercent / 100 ));
254256 end ;
255- finally
257+ end ;
258+
259+ if chunkCount > 0 then
260+ begin
261+ outputFileStream.WriteBuffer(chunkLine[1 ], chunkLen);
256262 { $IFDEF FPC}
257- outputBufWriter.Free;
263+ outputFileStream.Flush;
264+ { $ELSE}
265+ outputFileStream.FlushBuffer;
258266 { $ENDIF}
259267 end ;
260268 finally
0 commit comments