11unit Generate.Common;
22
3+ { $IFDEF FPC}
34{ $mode ObjFPC}{ $H+}
5+ { $ENDIF}
46
57interface
68
79uses
810 Classes
911, SysUtils
10- , streamex
12+ { $IFDEF FPC}
13+ { $ELSE}
14+ { $ENDIF}
1115;
1216
1317const
@@ -41,12 +45,21 @@ TGenerator = class(TObject)
4145implementation
4246
4347uses
44- Math,
45- bufstream
48+ Math
49+ { $IFDEF FPC}
50+ , streamex
51+ , bufstream
52+ { $ELSE}
53+ , System.Diagnostics
54+ { $ENDIF}
4655;
4756
4857const
49- batchPercent = 10 ;
58+ linesPercent = 10 ;
59+ stationsCapacity = 50000 ;
60+ chunkBatch = 10000 ;
61+ chunkCapacity = 20 * 1024 * 1024 ;
62+ lineEnding = #13 #10 ;
5063
5164{ TGenerator }
5265
@@ -61,8 +74,7 @@ constructor TGenerator.Create(
6174 FLineCount:= ALineCount;
6275
6376 FStationNames:= TStringList.Create;
64- FStationNames.Capacity:= 50000 ;
65- // FStationNames.CaseSensitive:= False;
77+ FStationNames.Capacity:= stationsCapacity;
6678 FStationNames.UseLocale:= False;
6779 FStationNames.Duplicates:= dupIgnore;
6880 FStationNames.Sorted:= True;
@@ -80,7 +92,7 @@ procedure TGenerator.BuildStationNames;
8092 streamReader: TStreamReader;
8193 entry: String;
8294 count: Int64 = 0 ;
83- start, stop: QWord ;
95+ start, stop: Int64 ;
8496begin
8597 WriteLn(' Building Weather Stations...' );
8698 // Load the Weather Station names
@@ -90,19 +102,27 @@ procedure TGenerator.BuildStationNames;
90102 try
91103 streamReader:= TStreamReader.Create(inputStream);
92104 try
105+ { $IFDEF FPC}
93106 start:= GetTickCount64;
94107 while not streamReader.Eof do
108+ { $ELSE}
109+ start := TStopwatch.GetTimeStamp;
110+ while not streamReader.EndOfStream do
111+ { $ENDIF}
95112 begin
96113 entry:= streamReader.ReadLine;
97114 if entry[1 ] <> ' #' then
98115 begin
99116 entry:= entry.Split(' ;' )[0 ];
100117 FStationNames.Add(entry);
101- // WriteLn('Got: ', entry);
102118 Inc(count);
103119 end ;
104120 end ;
121+ { $IFDEF FPC}
105122 stop:= GetTickCount64;
123+ { $ELSE}
124+ stop := TStopwatch.GetTimeStamp;
125+ { $ENDIF}
106126 finally
107127 streamReader.Free;
108128 end ;
@@ -149,12 +169,11 @@ procedure TGenerator.Generate;
149169 stationId: Int64;
150170 randomTemp: Integer;
151171 randomTempStr: String[4 ];
152- outputFileStream: TFileStream;
153- outputBufWriter: TWriteBufStream;
154- line, randomTempFinal: String;
172+ outputFileStream: TBufferedFileStream;
173+ chunkLine, randomTempFinal: String;
155174 stationArray, temperatureArray: TStringArray;
156175 LenStationArray, LenTemperatureArray: Array of Integer;
157- i, count, len , stationsCount, temperaturesCount: Integer;
176+ chunkCount, chunkLen , stationsCount, temperaturesCount: Integer;
158177 start: TDateTime;
159178begin
160179 // Randomize sets this variable depending on the current time
@@ -164,90 +183,88 @@ procedure TGenerator.Generate;
164183 // Build list of station names
165184 BuildStationNames;
166185
167- outputFileStream:= TFileStream .Create(FOutPutFile, fmCreate);
186+ outputFileStream:= TBufferedFileStream .Create(FOutPutFile, fmCreate);
168187
169- progressBatch:= floor(FLineCount * (batchPercent / 100 ));
188+ progressBatch:= floor(FLineCount * (linesPercent / 100 ));
170189 start:= Now;
171190
191+ // This is all paweld magic:
192+ // From here
172193 // based on code @domasz from lazarus forum, github: PascalVault
173194 stationsCount := FStationNames.Count;
174195 SetLength(stationArray, stationsCount);
175196 SetLength(LenStationArray, stationsCount);
176- for i := 0 to stationsCount - 1 do
197+ for index := 0 to stationsCount - 1 do
177198 begin
178- stationArray[i ] := FStationNames[i ] + ' ;' ;
179- LenStationArray[i ] := Length(stationArray[i ]);
199+ stationArray[index ] := FStationNames[index ] + ' ;' ;
200+ LenStationArray[index ] := Length(stationArray[index ]);
180201 end ;
181202
182203 temperaturesCount := 1999 ;
183204 SetLength(temperatureArray, temperaturesCount);
184205 SetLength(LenTemperatureArray, temperaturesCount);
185- temperatureArray[0 ] := ' 0.0' + # 13 # 10 ;
206+ temperatureArray[0 ] := ' 0.0' + lineEnding ;
186207 LenTemperatureArray[0 ] := Length(temperatureArray[0 ]);
187- for i := 1 to 999 do
208+ for index := 1 to 999 do
188209 begin
189- randomTempStr := IntToStr(i );
210+ randomTempStr := IntToStr(index );
190211 case Ord(randomTempStr[0 ]) of
191212 1 : randomTempFinal := ' 0.' + randomTempStr;
192213 2 : randomTempFinal := randomTempStr[1 ] + ' .' + randomTempStr[2 ];
193214 3 : randomTempFinal := randomTempStr[1 ] + randomTempStr[2 ] + ' .' + randomTempStr[3 ];
194215 4 : randomTempFinal := randomTempStr[1 ] + randomTempStr[2 ] + randomTempStr[3 ] + ' .' + randomTempStr[4 ];
195216 end ;
196- temperatureArray[i * 2 - 1 ] := randomTempFinal + # 13 # 10 ;
197- LenTemperatureArray[i * 2 - 1 ] := Length(temperatureArray[i * 2 - 1 ]);
198- temperatureArray[i * 2 ] := ' -' + randomTempFinal + # 13 # 10 ;
199- LenTemperatureArray[i * 2 ] := LenTemperatureArray[i * 2 - 1 ] + 1 ;
217+ temperatureArray[index * 2 - 1 ] := randomTempFinal + lineEnding ;
218+ LenTemperatureArray[index * 2 - 1 ] := Length(temperatureArray[index * 2 - 1 ]);
219+ temperatureArray[index * 2 ] := ' -' + randomTempFinal + lineEnding ;
220+ LenTemperatureArray[index * 2 ] := LenTemperatureArray[index * 2 - 1 ] + 1 ;
200221 end ;
201- //
202222
203- count := 0 ;
204- len := 0 ;
205- SetLength(line, 1024 * 1024 * 20 );
223+ chunkCount := chunkBatch;
224+ chunkLen := 0 ;
225+ SetLength(chunkLine, chunkCapacity);
226+ // To here
206227
207228 try
208- // outputBufWriter:= TWriteBufStream.Create(outputFileStream, 4*1024);
209- outputBufWriter:= TWriteBufStream.Create(outputFileStream, 64 *1024 );
210- try
211- Write(GenerateProgressBar(1 , FLineCount, 50 , 0 , Now - start), #13 );
212- // Generate the file
213- for index:= 1 to FLineCount do
214- begin
215- stationId:= Random(stationsCount);
216- // This is all paweld magic:
217- // From here
218- randomTemp:= Random(temperaturesCount);
219- Move(stationArray[stationId][1 ], line[len + 1 ], LenStationArray[stationId]);
220- Inc(len, LenStationArray[stationId]);
221- Move(temperatureArray[randomTemp][1 ], line[len + 1 ], LenTemperatureArray[randomTemp]);
222- Inc(len, LenTemperatureArray[randomTemp]);
229+ // Print first state of the progress bar
230+ Write(GenerateProgressBar(1 , FLineCount, 50 , 0 , Now - start), #13 );
231+ // Generate the file
232+ for index:= 1 to FLineCount do
233+ begin
234+ stationId:= Random(stationsCount);
235+ // This is all paweld magic:
236+ // From here
237+ randomTemp:= Random(temperaturesCount);
238+ Move(stationArray[stationId][1 ], chunkLine[chunkLen + 1 ], LenStationArray[stationId]);
239+ Inc(chunkLen, LenStationArray[stationId]);
240+ Move(temperatureArray[randomTemp][1 ], chunkLine[chunkLen + 1 ], LenTemperatureArray[randomTemp]);
241+ Inc(chunkLen, LenTemperatureArray[randomTemp]);
223242
224- Inc(count);
225- if count = 10000 then
226- begin
227- outputBufWriter.WriteBuffer(line[1 ], len);
228- count := 0 ;
229- len := 0 ;
230- end ;
231- // To here
232- Dec(progressBatch);
233- if progressBatch = 0 then
234- begin
235- Write(GenerateProgressBar(
236- index,
237- FLineCount,
238- 50 ,
239- outputFileStream.Size,
240- Now - start
241- ), #13 );
242- progressBatch:= floor(FLineCount * (batchPercent / 100 ));
243- end ;
243+ Dec(chunkCount);
244+ if chunkCount = 0 then
245+ begin
246+ outputFileStream.WriteBuffer(chunkLine[1 ], chunkLen);
247+ chunkCount := chunkBatch;
248+ chunkLen := 0 ;
244249 end ;
245- if count > 0 then
250+ // To here
251+ Dec(progressBatch);
252+ if progressBatch = 0 then
246253 begin
247- outputBufWriter.WriteBuffer(line[1 ], len);
254+ Write(GenerateProgressBar(
255+ index,
256+ FLineCount,
257+ 50 ,
258+ outputFileStream.Size,
259+ Now - start
260+ ), #13 );
261+ progressBatch:= floor(FLineCount * (linesPercent / 100 ));
248262 end ;
249- finally
250- outputBufWriter.Free;
263+ end ;
264+ if chunkCount > 0 then
265+ begin
266+ outputFileStream.WriteBuffer(chunkLine[1 ], chunkLen);
267+ outputFileStream.Flush;
251268 end ;
252269 finally
253270 WriteLn;
0 commit comments