1818#include " llvm/ADT/SmallVector.h"
1919#include " llvm/ADT/StringRef.h"
2020#include " llvm/Bitstream/BitCodes.h"
21+ #include " llvm/Support/Casting.h"
2122#include " llvm/Support/Endian.h"
2223#include " llvm/Support/MathExtras.h"
2324#include " llvm/Support/raw_ostream.h"
2829namespace llvm {
2930
3031class BitstreamWriter {
31- // / Out - The buffer that keeps unflushed bytes.
32- SmallVectorImpl<char > &Out;
33-
34- // / FS - The file stream that Out flushes to. If FS is nullptr, it does not
35- // / support read or seek, Out cannot be flushed until all data are written.
36- raw_fd_stream *FS;
37-
38- // / FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
39- // / FS.
32+ // / Owned buffer, used to init Buffer if the provided stream doesn't happen to
33+ // / be a buffer itself.
34+ SmallVector<char , 0 > OwnBuffer;
35+ // / Internal buffer for unflushed bytes (unless there is no stream to flush
36+ // / to, case in which these are "the bytes"). The writer backpatches, so it is
37+ // / efficient to buffer.
38+ SmallVectorImpl<char > &Buffer;
39+
40+ // / FS - The file stream that Buffer flushes to. If FS is a raw_fd_stream, the
41+ // / writer will incrementally flush at subblock boundaries. Otherwise flushing
42+ // / will happen at the end of BitstreamWriter's lifetime.
43+ raw_ostream *const FS;
44+
45+ // / FlushThreshold - this is the threshold (unit B) to flush to FS, if FS is a
46+ // / raw_fd_stream.
4047 const uint64_t FlushThreshold;
4148
4249 // / CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
43- unsigned CurBit;
50+ unsigned CurBit = 0 ;
4451
4552 // / CurValue - The current value. Only bits < CurBit are valid.
46- uint32_t CurValue;
53+ uint32_t CurValue = 0 ;
4754
4855 // / CurCodeSize - This is the declared size of code values used for the
4956 // / current block, in bits.
50- unsigned CurCodeSize;
57+ unsigned CurCodeSize = 2 ;
5158
5259 // / BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
5360 // / selected BLOCK ID.
54- unsigned BlockInfoCurBID;
61+ unsigned BlockInfoCurBID = 0 ;
5562
5663 // / CurAbbrevs - Abbrevs installed at in this block.
5764 std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
5865
66+ // Support for retrieving a section of the output, for purposes such as
67+ // checksumming.
68+ std::optional<size_t > BlockFlushingStartPos;
69+
5970 struct Block {
6071 unsigned PrevCodeSize;
6172 size_t StartSizeWord;
@@ -77,47 +88,106 @@ class BitstreamWriter {
7788 void WriteWord (unsigned Value) {
7889 Value =
7990 support::endian::byte_swap<uint32_t , llvm::endianness::little>(Value);
80- Out .append (reinterpret_cast <const char *>(&Value),
81- reinterpret_cast <const char *>(&Value + 1 ));
91+ Buffer .append (reinterpret_cast <const char *>(&Value),
92+ reinterpret_cast <const char *>(&Value + 1 ));
8293 }
8394
84- uint64_t GetNumOfFlushedBytes () const { return FS ? FS->tell () : 0 ; }
95+ uint64_t GetNumOfFlushedBytes () const {
96+ return fdStream () ? fdStream ()->tell () : 0 ;
97+ }
8598
86- size_t GetBufferOffset () const { return Out.size () + GetNumOfFlushedBytes (); }
99+ size_t GetBufferOffset () const {
100+ return Buffer.size () + GetNumOfFlushedBytes ();
101+ }
87102
88103 size_t GetWordIndex () const {
89104 size_t Offset = GetBufferOffset ();
90105 assert ((Offset & 3 ) == 0 && " Not 32-bit aligned" );
91106 return Offset / 4 ;
92107 }
93108
94- // / If the related file stream supports reading, seeking and writing, flush
95- // / the buffer if its size is above a threshold.
96- void FlushToFile () {
97- if (!FS)
109+ void flushAndClear () {
110+ assert (FS);
111+ assert (!Buffer.empty ());
112+ assert (!BlockFlushingStartPos &&
113+ " a call to markAndBlockFlushing should have been paired with a "
114+ " call to getMarkedBufferAndResumeFlushing" );
115+ FS->write (Buffer.data (), Buffer.size ());
116+ Buffer.clear ();
117+ }
118+
119+ // / If the related file stream is a raw_fd_stream, flush the buffer if its
120+ // / size is above a threshold. If \p OnClosing is true, flushing happens
121+ // / regardless of thresholds.
122+ void FlushToFile (bool OnClosing = false ) {
123+ if (!FS || Buffer.empty ())
98124 return ;
99- if (Out.size () < FlushThreshold)
125+ if (OnClosing)
126+ return flushAndClear ();
127+ if (BlockFlushingStartPos)
100128 return ;
101- FS->write ((char *)&Out.front (), Out.size ());
102- Out.clear ();
129+ if (fdStream () && Buffer.size () > FlushThreshold)
130+ flushAndClear ();
131+ }
132+
133+ raw_fd_stream *fdStream () { return dyn_cast_or_null<raw_fd_stream>(FS); }
134+
135+ const raw_fd_stream *fdStream () const {
136+ return dyn_cast_or_null<raw_fd_stream>(FS);
137+ }
138+
139+ SmallVectorImpl<char > &getInternalBufferFromStream (raw_ostream &OutStream) {
140+ if (auto *SV = dyn_cast<raw_svector_ostream>(&OutStream))
141+ return SV->buffer ();
142+ return OwnBuffer;
103143 }
104144
105145public:
106- // / Create a BitstreamWriter that writes to Buffer \p O.
146+ // / Create a BitstreamWriter over a raw_ostream \p OutStream.
147+ // / If \p OutStream is a raw_svector_ostream, the BitstreamWriter will write
148+ // / directly to the latter's buffer. In all other cases, the BitstreamWriter
149+ // / will use an internal buffer and flush at the end of its lifetime.
107150 // /
108- // / \p FS is the file stream that \p O flushes to incrementally. If \p FS is
109- // / null, \p O does not flush incrementially, but writes to disk at the end.
151+ // / In addition, if \p is a raw_fd_stream supporting seek, tell, and read
152+ // / (besides write), the BitstreamWriter will also flush incrementally, when a
153+ // / subblock is finished, and if the FlushThreshold is passed.
110154 // /
111- // / \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
112- // / valid. Flushing only occurs at (sub)block boundaries.
113- BitstreamWriter (SmallVectorImpl<char > &O, raw_fd_stream *FS = nullptr ,
114- uint32_t FlushThreshold = 512 )
115- : Out(O), FS(FS), FlushThreshold(uint64_t (FlushThreshold) << 20 ), CurBit(0 ),
116- CurValue (0 ), CurCodeSize(2 ) {}
155+ // / NOTE: \p FlushThreshold's unit is MB.
156+ BitstreamWriter (raw_ostream &OutStream, uint32_t FlushThreshold = 512 )
157+ : Buffer(getInternalBufferFromStream(OutStream)),
158+ FS (!isa<raw_svector_ostream>(OutStream) ? &OutStream : nullptr),
159+ FlushThreshold(uint64_t (FlushThreshold) << 20) {}
160+
161+ // / Convenience constructor for users that start with a vector - avoids
162+ // / needing to wrap it in a raw_svector_ostream.
163+ BitstreamWriter (SmallVectorImpl<char > &Buff)
164+ : Buffer(Buff), FS(nullptr ), FlushThreshold(0 ) {}
117165
118166 ~BitstreamWriter () {
119- assert (CurBit == 0 && " Unflushed data remaining " );
167+ FlushToWord ( );
120168 assert (BlockScope.empty () && CurAbbrevs.empty () && " Block imbalance" );
169+ FlushToFile (/* OnClosing=*/ true );
170+ }
171+
172+ // / For scenarios where the user wants to access a section of the stream to
173+ // / (for example) compute some checksum, disable flushing and remember the
174+ // / position in the internal buffer where that happened. Must be paired with a
175+ // / call to getMarkedBufferAndResumeFlushing.
176+ void markAndBlockFlushing () {
177+ assert (!BlockFlushingStartPos);
178+ BlockFlushingStartPos = Buffer.size ();
179+ }
180+
181+ // / resumes flushing, but does not flush, and returns the section in the
182+ // / internal buffer starting from the position marked with
183+ // / markAndBlockFlushing. The return should be processed before any additional
184+ // / calls to this object, because those may cause a flush and invalidate the
185+ // / return.
186+ StringRef getMarkedBufferAndResumeFlushing () {
187+ assert (BlockFlushingStartPos);
188+ size_t Start = *BlockFlushingStartPos;
189+ BlockFlushingStartPos.reset ();
190+ return {&Buffer[Start], Buffer.size () - Start};
121191 }
122192
123193 // / Retrieve the current position in the stream, in bits.
@@ -141,16 +211,19 @@ class BitstreamWriter {
141211 if (ByteNo >= NumOfFlushedBytes) {
142212 assert ((!endian::readAtBitAlignment<uint8_t , llvm::endianness::little,
143213 unaligned>(
144- &Out [ByteNo - NumOfFlushedBytes], StartBit)) &&
214+ &Buffer [ByteNo - NumOfFlushedBytes], StartBit)) &&
145215 " Expected to be patching over 0-value placeholders" );
146216 endian::writeAtBitAlignment<uint8_t , llvm::endianness::little, unaligned>(
147- &Out [ByteNo - NumOfFlushedBytes], NewByte, StartBit);
217+ &Buffer [ByteNo - NumOfFlushedBytes], NewByte, StartBit);
148218 return ;
149219 }
150220
221+ // If we don't have a raw_fd_stream, GetNumOfFlushedBytes() should have
222+ // returned 0, and we shouldn't be here.
223+ assert (fdStream () != nullptr );
151224 // If the byte offset to backpatch is flushed, use seek to backfill data.
152225 // First, save the file position to restore later.
153- uint64_t CurPos = FS ->tell ();
226+ uint64_t CurPos = fdStream () ->tell ();
154227
155228 // Copy data to update into Bytes from the file FS and the buffer Out.
156229 char Bytes[3 ]; // Use one more byte to silence a warning from Visual C++.
@@ -159,19 +232,19 @@ class BitstreamWriter {
159232 size_t BytesFromBuffer = BytesNum - BytesFromDisk;
160233
161234 // When unaligned, copy existing data into Bytes from the file FS and the
162- // buffer Out so that it can be updated before writing. For debug builds
235+ // buffer Buffer so that it can be updated before writing. For debug builds
163236 // read bytes unconditionally in order to check that the existing value is 0
164237 // as expected.
165238#ifdef NDEBUG
166239 if (StartBit)
167240#endif
168241 {
169- FS ->seek (ByteNo);
170- ssize_t BytesRead = FS ->read (Bytes, BytesFromDisk);
242+ fdStream () ->seek (ByteNo);
243+ ssize_t BytesRead = fdStream () ->read (Bytes, BytesFromDisk);
171244 (void )BytesRead; // silence warning
172245 assert (BytesRead >= 0 && static_cast <size_t >(BytesRead) == BytesFromDisk);
173246 for (size_t i = 0 ; i < BytesFromBuffer; ++i)
174- Bytes[BytesFromDisk + i] = Out [i];
247+ Bytes[BytesFromDisk + i] = Buffer [i];
175248 assert ((!endian::readAtBitAlignment<uint8_t , llvm::endianness::little,
176249 unaligned>(Bytes, StartBit)) &&
177250 " Expected to be patching over 0-value placeholders" );
@@ -182,13 +255,13 @@ class BitstreamWriter {
182255 Bytes, NewByte, StartBit);
183256
184257 // Copy updated data back to the file FS and the buffer Out.
185- FS ->seek (ByteNo);
186- FS ->write (Bytes, BytesFromDisk);
258+ fdStream () ->seek (ByteNo);
259+ fdStream () ->write (Bytes, BytesFromDisk);
187260 for (size_t i = 0 ; i < BytesFromBuffer; ++i)
188- Out [i] = Bytes[BytesFromDisk + i];
261+ Buffer [i] = Bytes[BytesFromDisk + i];
189262
190263 // Restore the file position.
191- FS ->seek (CurPos);
264+ fdStream () ->seek (CurPos);
192265 }
193266
194267 void BackpatchHalfWord (uint64_t BitNo, uint16_t Val) {
@@ -481,11 +554,11 @@ class BitstreamWriter {
481554
482555 // Emit literal bytes.
483556 assert (llvm::all_of (Bytes, [](UIntTy B) { return isUInt<8 >(B); }));
484- Out .append (Bytes.begin (), Bytes.end ());
557+ Buffer .append (Bytes.begin (), Bytes.end ());
485558
486559 // Align end to 32-bits.
487560 while (GetBufferOffset () & 3 )
488- Out .push_back (0 );
561+ Buffer .push_back (0 );
489562 }
490563 void emitBlob (StringRef Bytes, bool ShouldEmitSize = true ) {
491564 emitBlob (ArrayRef ((const uint8_t *)Bytes.data (), Bytes.size ()),
0 commit comments