Skip to content

Commit 72b990d

Browse files
mtrofinronlieb
authored andcommitted
Unittests and usability for BitstreamWriter incremental flushing (llvm#92983)
- added unittests for the raw_fd_stream output case. - the `BitstreamWriter` ctor was confusing, the relationship between the buffer and the file stream wasn't clear and in fact there was a potential bug in `BitcodeWriter` in the mach-o case, because that code assumed in-buffer only serialization. The incremental flushing behavior of flushing at end of block boundaries was an implementation detail that meant serializers not using blocks (for example) would need to know to check the buffer and flush. The bug was latent - in the sense that, today, because the stream being passed was not a `raw_fd_stream`, incremental buffering never kicked in. The new design moves the responsibility of flushing to the `BitstreamWriter`, and makes it work with any `raw_ostream` (but incrementally flush only in the `raw_fd_stream` case). If the `raw_ostream` is over a buffer - i.e. a `raw_svector_stream` - then it's equivalent to today's buffer case. For all other `raw_ostream` cases, buffering is an implementation detail. In all cases, the buffer is flushed (well, in the buffer case, that's a moot statement). This simplifies the state and state transitions the user has to track: you have a raw_ostream -> BitstreamWrite in it -> destroy the writer => the bitstream is completely written in your raw_ostream. The "buffer" case and the "raw_fd_stream" case become optimizations rather than imposing state transition concerns to the user. Change-Id: I36015a1ca3b87c6c89f4aa51142b829e526a0737
1 parent 10e40cf commit 72b990d

File tree

7 files changed

+253
-84
lines changed

7 files changed

+253
-84
lines changed

llvm/include/llvm/Bitcode/BitcodeWriter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class Module;
3030
class raw_ostream;
3131

3232
class BitcodeWriter {
33-
SmallVectorImpl<char> &Buffer;
3433
std::unique_ptr<BitstreamWriter> Stream;
3534

3635
StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
@@ -47,7 +46,8 @@ class BitcodeWriter {
4746

4847
public:
4948
/// Create a BitcodeWriter that writes to Buffer.
50-
BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr);
49+
BitcodeWriter(SmallVectorImpl<char> &Buffer);
50+
BitcodeWriter(raw_ostream &FS);
5151

5252
~BitcodeWriter();
5353

llvm/include/llvm/Bitstream/BitstreamWriter.h

Lines changed: 120 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/SmallVector.h"
1919
#include "llvm/ADT/StringRef.h"
2020
#include "llvm/Bitstream/BitCodes.h"
21+
#include "llvm/Support/Casting.h"
2122
#include "llvm/Support/Endian.h"
2223
#include "llvm/Support/MathExtras.h"
2324
#include "llvm/Support/raw_ostream.h"
@@ -28,34 +29,44 @@
2829
namespace llvm {
2930

3031
class BitstreamWriter {
31-
/// Out - The buffer that keeps unflushed bytes.
32-
SmallVectorImpl<char> &Out;
33-
34-
/// FS - The file stream that Out flushes to. If FS is nullptr, it does not
35-
/// support read or seek, Out cannot be flushed until all data are written.
36-
raw_fd_stream *FS;
37-
38-
/// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
39-
/// FS.
32+
/// Owned buffer, used to init Buffer if the provided stream doesn't happen to
33+
/// be a buffer itself.
34+
SmallVector<char, 0> OwnBuffer;
35+
/// Internal buffer for unflushed bytes (unless there is no stream to flush
36+
/// to, case in which these are "the bytes"). The writer backpatches, so it is
37+
/// efficient to buffer.
38+
SmallVectorImpl<char> &Buffer;
39+
40+
/// FS - The file stream that Buffer flushes to. If FS is a raw_fd_stream, the
41+
/// writer will incrementally flush at subblock boundaries. Otherwise flushing
42+
/// will happen at the end of BitstreamWriter's lifetime.
43+
raw_ostream *const FS;
44+
45+
/// FlushThreshold - this is the threshold (unit B) to flush to FS, if FS is a
46+
/// raw_fd_stream.
4047
const uint64_t FlushThreshold;
4148

4249
/// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
43-
unsigned CurBit;
50+
unsigned CurBit = 0;
4451

4552
/// CurValue - The current value. Only bits < CurBit are valid.
46-
uint32_t CurValue;
53+
uint32_t CurValue = 0;
4754

4855
/// CurCodeSize - This is the declared size of code values used for the
4956
/// current block, in bits.
50-
unsigned CurCodeSize;
57+
unsigned CurCodeSize = 2;
5158

5259
/// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
5360
/// selected BLOCK ID.
54-
unsigned BlockInfoCurBID;
61+
unsigned BlockInfoCurBID = 0;
5562

5663
/// CurAbbrevs - Abbrevs installed at in this block.
5764
std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
5865

66+
// Support for retrieving a section of the output, for purposes such as
67+
// checksumming.
68+
std::optional<size_t> BlockFlushingStartPos;
69+
5970
struct Block {
6071
unsigned PrevCodeSize;
6172
size_t StartSizeWord;
@@ -77,47 +88,106 @@ class BitstreamWriter {
7788
void WriteWord(unsigned Value) {
7889
Value =
7990
support::endian::byte_swap<uint32_t, llvm::endianness::little>(Value);
80-
Out.append(reinterpret_cast<const char *>(&Value),
81-
reinterpret_cast<const char *>(&Value + 1));
91+
Buffer.append(reinterpret_cast<const char *>(&Value),
92+
reinterpret_cast<const char *>(&Value + 1));
8293
}
8394

84-
uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
95+
uint64_t GetNumOfFlushedBytes() const {
96+
return fdStream() ? fdStream()->tell() : 0;
97+
}
8598

86-
size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); }
99+
size_t GetBufferOffset() const {
100+
return Buffer.size() + GetNumOfFlushedBytes();
101+
}
87102

88103
size_t GetWordIndex() const {
89104
size_t Offset = GetBufferOffset();
90105
assert((Offset & 3) == 0 && "Not 32-bit aligned");
91106
return Offset / 4;
92107
}
93108

94-
/// If the related file stream supports reading, seeking and writing, flush
95-
/// the buffer if its size is above a threshold.
96-
void FlushToFile() {
97-
if (!FS)
109+
void flushAndClear() {
110+
assert(FS);
111+
assert(!Buffer.empty());
112+
assert(!BlockFlushingStartPos &&
113+
"a call to markAndBlockFlushing should have been paired with a "
114+
"call to getMarkedBufferAndResumeFlushing");
115+
FS->write(Buffer.data(), Buffer.size());
116+
Buffer.clear();
117+
}
118+
119+
/// If the related file stream is a raw_fd_stream, flush the buffer if its
120+
/// size is above a threshold. If \p OnClosing is true, flushing happens
121+
/// regardless of thresholds.
122+
void FlushToFile(bool OnClosing = false) {
123+
if (!FS || Buffer.empty())
98124
return;
99-
if (Out.size() < FlushThreshold)
125+
if (OnClosing)
126+
return flushAndClear();
127+
if (BlockFlushingStartPos)
100128
return;
101-
FS->write((char *)&Out.front(), Out.size());
102-
Out.clear();
129+
if (fdStream() && Buffer.size() > FlushThreshold)
130+
flushAndClear();
131+
}
132+
133+
raw_fd_stream *fdStream() { return dyn_cast_or_null<raw_fd_stream>(FS); }
134+
135+
const raw_fd_stream *fdStream() const {
136+
return dyn_cast_or_null<raw_fd_stream>(FS);
137+
}
138+
139+
SmallVectorImpl<char> &getInternalBufferFromStream(raw_ostream &OutStream) {
140+
if (auto *SV = dyn_cast<raw_svector_ostream>(&OutStream))
141+
return SV->buffer();
142+
return OwnBuffer;
103143
}
104144

105145
public:
106-
/// Create a BitstreamWriter that writes to Buffer \p O.
146+
/// Create a BitstreamWriter over a raw_ostream \p OutStream.
147+
/// If \p OutStream is a raw_svector_ostream, the BitstreamWriter will write
148+
/// directly to the latter's buffer. In all other cases, the BitstreamWriter
149+
/// will use an internal buffer and flush at the end of its lifetime.
107150
///
108-
/// \p FS is the file stream that \p O flushes to incrementally. If \p FS is
109-
/// null, \p O does not flush incrementially, but writes to disk at the end.
151+
/// In addition, if \p is a raw_fd_stream supporting seek, tell, and read
152+
/// (besides write), the BitstreamWriter will also flush incrementally, when a
153+
/// subblock is finished, and if the FlushThreshold is passed.
110154
///
111-
/// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
112-
/// valid. Flushing only occurs at (sub)block boundaries.
113-
BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
114-
uint32_t FlushThreshold = 512)
115-
: Out(O), FS(FS), FlushThreshold(uint64_t(FlushThreshold) << 20), CurBit(0),
116-
CurValue(0), CurCodeSize(2) {}
155+
/// NOTE: \p FlushThreshold's unit is MB.
156+
BitstreamWriter(raw_ostream &OutStream, uint32_t FlushThreshold = 512)
157+
: Buffer(getInternalBufferFromStream(OutStream)),
158+
FS(!isa<raw_svector_ostream>(OutStream) ? &OutStream : nullptr),
159+
FlushThreshold(uint64_t(FlushThreshold) << 20) {}
160+
161+
/// Convenience constructor for users that start with a vector - avoids
162+
/// needing to wrap it in a raw_svector_ostream.
163+
BitstreamWriter(SmallVectorImpl<char> &Buff)
164+
: Buffer(Buff), FS(nullptr), FlushThreshold(0) {}
117165

118166
~BitstreamWriter() {
119-
assert(CurBit == 0 && "Unflushed data remaining");
167+
FlushToWord();
120168
assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
169+
FlushToFile(/*OnClosing=*/true);
170+
}
171+
172+
/// For scenarios where the user wants to access a section of the stream to
173+
/// (for example) compute some checksum, disable flushing and remember the
174+
/// position in the internal buffer where that happened. Must be paired with a
175+
/// call to getMarkedBufferAndResumeFlushing.
176+
void markAndBlockFlushing() {
177+
assert(!BlockFlushingStartPos);
178+
BlockFlushingStartPos = Buffer.size();
179+
}
180+
181+
/// resumes flushing, but does not flush, and returns the section in the
182+
/// internal buffer starting from the position marked with
183+
/// markAndBlockFlushing. The return should be processed before any additional
184+
/// calls to this object, because those may cause a flush and invalidate the
185+
/// return.
186+
StringRef getMarkedBufferAndResumeFlushing() {
187+
assert(BlockFlushingStartPos);
188+
size_t Start = *BlockFlushingStartPos;
189+
BlockFlushingStartPos.reset();
190+
return {&Buffer[Start], Buffer.size() - Start};
121191
}
122192

123193
/// Retrieve the current position in the stream, in bits.
@@ -141,16 +211,19 @@ class BitstreamWriter {
141211
if (ByteNo >= NumOfFlushedBytes) {
142212
assert((!endian::readAtBitAlignment<uint8_t, llvm::endianness::little,
143213
unaligned>(
144-
&Out[ByteNo - NumOfFlushedBytes], StartBit)) &&
214+
&Buffer[ByteNo - NumOfFlushedBytes], StartBit)) &&
145215
"Expected to be patching over 0-value placeholders");
146216
endian::writeAtBitAlignment<uint8_t, llvm::endianness::little, unaligned>(
147-
&Out[ByteNo - NumOfFlushedBytes], NewByte, StartBit);
217+
&Buffer[ByteNo - NumOfFlushedBytes], NewByte, StartBit);
148218
return;
149219
}
150220

221+
// If we don't have a raw_fd_stream, GetNumOfFlushedBytes() should have
222+
// returned 0, and we shouldn't be here.
223+
assert(fdStream() != nullptr);
151224
// If the byte offset to backpatch is flushed, use seek to backfill data.
152225
// First, save the file position to restore later.
153-
uint64_t CurPos = FS->tell();
226+
uint64_t CurPos = fdStream()->tell();
154227

155228
// Copy data to update into Bytes from the file FS and the buffer Out.
156229
char Bytes[3]; // Use one more byte to silence a warning from Visual C++.
@@ -159,19 +232,19 @@ class BitstreamWriter {
159232
size_t BytesFromBuffer = BytesNum - BytesFromDisk;
160233

161234
// When unaligned, copy existing data into Bytes from the file FS and the
162-
// buffer Out so that it can be updated before writing. For debug builds
235+
// buffer Buffer so that it can be updated before writing. For debug builds
163236
// read bytes unconditionally in order to check that the existing value is 0
164237
// as expected.
165238
#ifdef NDEBUG
166239
if (StartBit)
167240
#endif
168241
{
169-
FS->seek(ByteNo);
170-
ssize_t BytesRead = FS->read(Bytes, BytesFromDisk);
242+
fdStream()->seek(ByteNo);
243+
ssize_t BytesRead = fdStream()->read(Bytes, BytesFromDisk);
171244
(void)BytesRead; // silence warning
172245
assert(BytesRead >= 0 && static_cast<size_t>(BytesRead) == BytesFromDisk);
173246
for (size_t i = 0; i < BytesFromBuffer; ++i)
174-
Bytes[BytesFromDisk + i] = Out[i];
247+
Bytes[BytesFromDisk + i] = Buffer[i];
175248
assert((!endian::readAtBitAlignment<uint8_t, llvm::endianness::little,
176249
unaligned>(Bytes, StartBit)) &&
177250
"Expected to be patching over 0-value placeholders");
@@ -182,13 +255,13 @@ class BitstreamWriter {
182255
Bytes, NewByte, StartBit);
183256

184257
// Copy updated data back to the file FS and the buffer Out.
185-
FS->seek(ByteNo);
186-
FS->write(Bytes, BytesFromDisk);
258+
fdStream()->seek(ByteNo);
259+
fdStream()->write(Bytes, BytesFromDisk);
187260
for (size_t i = 0; i < BytesFromBuffer; ++i)
188-
Out[i] = Bytes[BytesFromDisk + i];
261+
Buffer[i] = Bytes[BytesFromDisk + i];
189262

190263
// Restore the file position.
191-
FS->seek(CurPos);
264+
fdStream()->seek(CurPos);
192265
}
193266

194267
void BackpatchHalfWord(uint64_t BitNo, uint16_t Val) {
@@ -481,11 +554,11 @@ class BitstreamWriter {
481554

482555
// Emit literal bytes.
483556
assert(llvm::all_of(Bytes, [](UIntTy B) { return isUInt<8>(B); }));
484-
Out.append(Bytes.begin(), Bytes.end());
557+
Buffer.append(Bytes.begin(), Bytes.end());
485558

486559
// Align end to 32-bits.
487560
while (GetBufferOffset() & 3)
488-
Out.push_back(0);
561+
Buffer.push_back(0);
489562
}
490563
void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) {
491564
emitBlob(ArrayRef((const uint8_t *)Bytes.data(), Bytes.size()),

llvm/include/llvm/ProfileData/PGOCtxProfWriter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class PGOCtxProfileWriter final {
7070
public:
7171
PGOCtxProfileWriter(raw_fd_stream &Out,
7272
std::optional<unsigned> VersionOverride = std::nullopt)
73-
: Writer(Buff, &Out, 0) {
73+
: Writer(Out, 0) {
7474
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ProfileMetadataBlockID,
7575
CodeLen);
7676
const auto Version = VersionOverride ? *VersionOverride : CurrentVersion;

llvm/include/llvm/Support/raw_ostream.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class raw_ostream {
5555
enum class OStreamKind {
5656
OK_OStream,
5757
OK_FDStream,
58+
OK_SVecStream,
5859
};
5960

6061
private:
@@ -703,7 +704,11 @@ class raw_svector_ostream : public raw_pwrite_stream {
703704
///
704705
/// \param O The vector to write to; this should generally have at least 128
705706
/// bytes free to avoid any extraneous memory overhead.
706-
explicit raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
707+
explicit raw_svector_ostream(SmallVectorImpl<char> &O)
708+
: raw_pwrite_stream(false, raw_ostream::OStreamKind::OK_SVecStream),
709+
OS(O) {
710+
// FIXME: here and in a few other places, set directly to unbuffered in the
711+
// ctor.
707712
SetUnbuffered();
708713
}
709714

@@ -713,10 +718,13 @@ class raw_svector_ostream : public raw_pwrite_stream {
713718

714719
/// Return a StringRef for the vector contents.
715720
StringRef str() const { return StringRef(OS.data(), OS.size()); }
721+
SmallVectorImpl<char> &buffer() { return OS; }
716722

717723
void reserveExtraSpace(uint64_t ExtraSize) override {
718724
OS.reserve(tell() + ExtraSize);
719725
}
726+
727+
static bool classof(const raw_ostream *OS);
720728
};
721729

722730
/// A raw_ostream that discards all output.

0 commit comments

Comments
 (0)