Skip to content

Commit 736225c

Browse files
authored
Revert "[Serialization] Remove delta encoding optimization (llvm#145670)" (#11622)
This reverts commit e90ab0e. Removing the delta encoding optimization led to a 5% in PCH sizes and builders ran out of disk space. Conflicts: clang/lib/Serialization/ASTWriter.cpp Temporary workaround for rdar://162016610.
1 parent fb002b7 commit 736225c

File tree

8 files changed

+240
-54
lines changed

8 files changed

+240
-54
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,8 @@ class ASTReader
488488
using ModuleReverseIterator = ModuleManager::ModuleReverseIterator;
489489

490490
private:
491+
using LocSeq = SourceLocationSequence;
492+
491493
/// The receiver of some callbacks invoked by ASTReader.
492494
std::unique_ptr<ASTReaderListener> Listener;
493495

@@ -2450,16 +2452,18 @@ class ASTReader
24502452
/// Read a source location from raw form and return it in its
24512453
/// originating module file's source location space.
24522454
std::pair<SourceLocation, unsigned>
2453-
ReadUntranslatedSourceLocation(RawLocEncoding Raw) const {
2454-
return SourceLocationEncoding::decode(Raw);
2455+
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
2456+
LocSeq *Seq = nullptr) const {
2457+
return SourceLocationEncoding::decode(Raw, Seq);
24552458
}
24562459

24572460
/// Read a source location from raw form.
2458-
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const {
2461+
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
2462+
LocSeq *Seq = nullptr) const {
24592463
if (!MF.ModuleOffsetMap.empty())
24602464
ReadModuleOffsetMap(MF);
24612465

2462-
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw);
2466+
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
24632467
ModuleFile *OwningModuleFile =
24642468
ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1];
24652469

@@ -2487,9 +2491,9 @@ class ASTReader
24872491

24882492
/// Read a source location.
24892493
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
2490-
const RecordDataImpl &Record,
2491-
unsigned &Idx) {
2492-
return ReadSourceLocation(ModuleFile, Record[Idx++]);
2494+
const RecordDataImpl &Record, unsigned &Idx,
2495+
LocSeq *Seq = nullptr) {
2496+
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
24932497
}
24942498

24952499
/// Read a FileID.
@@ -2508,7 +2512,7 @@ class ASTReader
25082512

25092513
/// Read a source range.
25102514
SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record,
2511-
unsigned &Idx);
2515+
unsigned &Idx, LocSeq *Seq = nullptr);
25122516

25132517
static llvm::BitVector ReadBitVector(const RecordData &Record,
25142518
const StringRef Blob);

clang/include/clang/Serialization/ASTRecordReader.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class OMPChildren;
3232
class ASTRecordReader
3333
: public serialization::DataStreamBasicReader<ASTRecordReader> {
3434
using ModuleFile = serialization::ModuleFile;
35+
using LocSeq = SourceLocationSequence;
3536

3637
ASTReader *Reader;
3738
ModuleFile *F;
@@ -159,7 +160,7 @@ class ASTRecordReader
159160
TypeSourceInfo *readTypeSourceInfo();
160161

161162
/// Reads the location information for a type.
162-
void readTypeLoc(TypeLoc TL);
163+
void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
163164

164165
/// Map a local type ID within a given AST file to a global type ID.
165166
serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const {
@@ -292,13 +293,13 @@ class ASTRecordReader
292293
void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A);
293294

294295
/// Read a source location, advancing Idx.
295-
SourceLocation readSourceLocation() {
296-
return Reader->ReadSourceLocation(*F, Record, Idx);
296+
SourceLocation readSourceLocation(LocSeq *Seq = nullptr) {
297+
return Reader->ReadSourceLocation(*F, Record, Idx, Seq);
297298
}
298299

299300
/// Read a source range, advancing Idx.
300-
SourceRange readSourceRange() {
301-
return Reader->ReadSourceRange(*F, Record, Idx);
301+
SourceRange readSourceRange(LocSeq *Seq = nullptr) {
302+
return Reader->ReadSourceRange(*F, Record, Idx, Seq);
302303
}
303304

304305
/// Read an arbitrary constant value, advancing Idx.

clang/include/clang/Serialization/ASTRecordWriter.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class TypeLoc;
2929
/// An object for streaming information to a record.
3030
class ASTRecordWriter
3131
: public serialization::DataStreamBasicWriter<ASTRecordWriter> {
32+
using LocSeq = SourceLocationSequence;
3233

3334
ASTWriter *Writer;
3435
ASTWriter::RecordDataImpl *Record;
@@ -146,8 +147,8 @@ class ASTRecordWriter
146147
void AddFunctionDefinition(const FunctionDecl *FD);
147148

148149
/// Emit a source location.
149-
void AddSourceLocation(SourceLocation Loc) {
150-
return Writer->AddSourceLocation(Loc, *Record);
150+
void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) {
151+
return Writer->AddSourceLocation(Loc, *Record, Seq);
151152
}
152153
void writeSourceLocation(SourceLocation Loc) {
153154
AddSourceLocation(Loc);
@@ -174,8 +175,8 @@ class ASTRecordWriter
174175
}
175176

176177
/// Emit a source range.
177-
void AddSourceRange(SourceRange Range) {
178-
return Writer->AddSourceRange(Range, *Record);
178+
void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) {
179+
return Writer->AddSourceRange(Range, *Record, Seq);
179180
}
180181

181182
void writeBool(bool Value) {
@@ -245,7 +246,7 @@ class ASTRecordWriter
245246
void AddTypeSourceInfo(TypeSourceInfo *TInfo);
246247

247248
/// Emits source location information for a type. Does not emit the type.
248-
void AddTypeLoc(TypeLoc TL);
249+
void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
249250

250251
/// Emits a template argument location info.
251252
void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ class ASTWriter : public ASTDeserializationListener,
115115
using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx,
116116
serialization::UnsafeQualTypeDenseMapInfo>;
117117

118+
using LocSeq = SourceLocationSequence;
119+
118120
/// The bitstream writer used to emit this precompiled header.
119121
llvm::BitstreamWriter &Stream;
120122

@@ -733,14 +735,16 @@ class ASTWriter : public ASTDeserializationListener,
733735
void AddFileID(FileID FID, RecordDataImpl &Record);
734736

735737
/// Emit a source location.
736-
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record);
738+
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
739+
LocSeq *Seq = nullptr);
737740

738741
/// Return the raw encodings for source locations.
739742
SourceLocationEncoding::RawLocEncoding
740-
getRawSourceLocationEncoding(SourceLocation Loc);
743+
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
741744

742745
/// Emit a source range.
743-
void AddSourceRange(SourceRange Range, RecordDataImpl &Record);
746+
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
747+
LocSeq *Seq = nullptr);
744748

745749
/// Emit a reference to an identifier.
746750
void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record);

clang/include/clang/Serialization/SourceLocationEncoding.h

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
2626
// space in case the index of the module file is 0.
2727
//
28+
// Specially, if the index of the module file is 0, we allow to encode a
29+
// sequence of locations we store only differences between successive elements.
2830
//
2931
//===----------------------------------------------------------------------===//
3032

@@ -36,6 +38,7 @@
3638
#include <climits>
3739

3840
namespace clang {
41+
class SourceLocationSequence;
3942

4043
/// Serialized encoding of SourceLocations without context.
4144
/// Optimized to have small unsigned values (=> small after VBR encoding).
@@ -51,22 +54,119 @@ class SourceLocationEncoding {
5154
static UIntTy decodeRaw(UIntTy Raw) {
5255
return (Raw >> 1) | (Raw << (UIntBits - 1));
5356
}
57+
friend SourceLocationSequence;
5458

5559
public:
5660
using RawLocEncoding = uint64_t;
5761

5862
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
59-
unsigned BaseModuleFileIndex);
60-
static std::pair<SourceLocation, unsigned> decode(RawLocEncoding);
63+
unsigned BaseModuleFileIndex,
64+
SourceLocationSequence * = nullptr);
65+
static std::pair<SourceLocation, unsigned>
66+
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
67+
};
68+
69+
/// Serialized encoding of a sequence of SourceLocations.
70+
///
71+
/// Optimized to produce small values when locations with the sequence are
72+
/// similar. Each element can be delta-encoded against the last nonzero element.
73+
///
74+
/// Sequences should be started by creating a SourceLocationSequence::State,
75+
/// and then passed around as SourceLocationSequence*. Example:
76+
///
77+
/// // establishes a sequence
78+
/// void EmitTopLevelThing() {
79+
/// SourceLocationSequence::State Seq;
80+
/// EmitContainedThing(Seq);
81+
/// EmitRecursiveThing(Seq);
82+
/// }
83+
///
84+
/// // optionally part of a sequence
85+
/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
86+
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
87+
/// }
88+
///
89+
/// // establishes a sequence if there isn't one already
90+
/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
91+
/// SourceLocationSequence::State Seq(ParentSeq);
92+
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
93+
/// EmitRecursiveThing(Seq);
94+
/// }
95+
///
96+
class SourceLocationSequence {
97+
using UIntTy = SourceLocation::UIntTy;
98+
using EncodedTy = uint64_t;
99+
constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
100+
static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
101+
102+
// Prev stores the rotated last nonzero location.
103+
UIntTy &Prev;
104+
105+
// Zig-zag encoding turns small signed integers into small unsigned integers.
106+
// 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
107+
static UIntTy zigZag(UIntTy V) {
108+
UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
109+
return Sign ^ (V << 1);
110+
}
111+
static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
112+
113+
SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
114+
115+
EncodedTy encodeRaw(UIntTy Raw) {
116+
if (Raw == 0)
117+
return 0;
118+
UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
119+
if (Prev == 0)
120+
return Prev = Rotated;
121+
UIntTy Delta = Rotated - Prev;
122+
Prev = Rotated;
123+
// Exactly one 33 bit value is possible! (1 << 32).
124+
// This is because we have two representations of zero: trivial & relative.
125+
return 1 + EncodedTy{zigZag(Delta)};
126+
}
127+
UIntTy decodeRaw(EncodedTy Encoded) {
128+
if (Encoded == 0)
129+
return 0;
130+
if (Prev == 0)
131+
return SourceLocationEncoding::decodeRaw(Prev = Encoded);
132+
return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
133+
}
134+
135+
public:
136+
SourceLocation decode(EncodedTy Encoded) {
137+
return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
138+
}
139+
EncodedTy encode(SourceLocation Loc) {
140+
return encodeRaw(Loc.getRawEncoding());
141+
}
142+
143+
class State;
144+
};
145+
146+
/// This object establishes a SourceLocationSequence.
147+
class SourceLocationSequence::State {
148+
UIntTy Prev = 0;
149+
SourceLocationSequence Seq;
150+
151+
public:
152+
// If Parent is provided and non-null, then this root becomes part of that
153+
// enclosing sequence instead of establishing a new one.
154+
State(SourceLocationSequence *Parent = nullptr)
155+
: Seq(Parent ? Parent->Prev : Prev) {}
156+
157+
// Implicit conversion for uniform use of roots vs propagated sequences.
158+
operator SourceLocationSequence *() { return &Seq; }
61159
};
62160

63161
inline SourceLocationEncoding::RawLocEncoding
64162
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
65-
unsigned BaseModuleFileIndex) {
163+
unsigned BaseModuleFileIndex,
164+
SourceLocationSequence *Seq) {
66165
// If the source location is a local source location, we can try to optimize
67166
// the similar sequences to only record the differences.
68167
if (!BaseOffset)
69-
return encodeRaw(Loc.getRawEncoding());
168+
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169+
70170
if (Loc.isInvalid())
71171
return 0;
72172

@@ -83,11 +183,13 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
83183
return Encoded;
84184
}
85185
inline std::pair<SourceLocation, unsigned>
86-
SourceLocationEncoding::decode(RawLocEncoding Encoded) {
186+
SourceLocationEncoding::decode(RawLocEncoding Encoded,
187+
SourceLocationSequence *Seq) {
87188
unsigned ModuleFileIndex = Encoded >> 32;
88189

89190
if (!ModuleFileIndex)
90-
return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
191+
return {Seq ? Seq->decode(Encoded)
192+
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
91193
ModuleFileIndex};
92194

93195
Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);

clang/lib/Serialization/ASTReader.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,9 +1935,10 @@ bool ASTReader::ReadSLocEntry(int ID) {
19351935
}
19361936

19371937
case SM_SLOC_EXPANSION_ENTRY: {
1938-
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
1939-
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]);
1940-
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]);
1938+
LocSeq::State Seq;
1939+
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
1940+
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
1941+
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
19411942
SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
19421943
Record[5], Record[4], ID,
19431944
BaseOffset + Record[0]);
@@ -7192,10 +7193,13 @@ QualType ASTReader::readTypeRecord(TypeID ID) {
71927193
namespace clang {
71937194

71947195
class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
7196+
using LocSeq = SourceLocationSequence;
7197+
71957198
ASTRecordReader &Reader;
7199+
LocSeq *Seq;
71967200

7197-
SourceLocation readSourceLocation() { return Reader.readSourceLocation(); }
7198-
SourceRange readSourceRange() { return Reader.readSourceRange(); }
7201+
SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); }
7202+
SourceRange readSourceRange() { return Reader.readSourceRange(Seq); }
71997203

72007204
TypeSourceInfo *GetTypeSourceInfo() {
72017205
return Reader.readTypeSourceInfo();
@@ -7210,7 +7214,8 @@ class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
72107214
}
72117215

72127216
public:
7213-
TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}
7217+
TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq)
7218+
: Reader(Reader), Seq(Seq) {}
72147219

72157220
// We want compile-time assurance that we've enumerated all of
72167221
// these, so unfortunately we have to declare them first, then
@@ -7585,8 +7590,9 @@ void TypeLocReader::VisitDependentBitIntTypeLoc(
75857590
TL.setNameLoc(readSourceLocation());
75867591
}
75877592

7588-
void ASTRecordReader::readTypeLoc(TypeLoc TL) {
7589-
TypeLocReader TLR(*this);
7593+
void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) {
7594+
LocSeq::State Seq(ParentSeq);
7595+
TypeLocReader TLR(*this, Seq);
75907596
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
75917597
TLR.Visit(TL);
75927598
}
@@ -10157,9 +10163,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() {
1015710163
}
1015810164

1015910165
SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
10160-
unsigned &Idx) {
10161-
SourceLocation beg = ReadSourceLocation(F, Record, Idx);
10162-
SourceLocation end = ReadSourceLocation(F, Record, Idx);
10166+
unsigned &Idx, LocSeq *Seq) {
10167+
SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq);
10168+
SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq);
1016310169
return SourceRange(beg, end);
1016410170
}
1016510171

0 commit comments

Comments
 (0)