Skip to content

Commit 8c1e0c8

Browse files
authored
Merge pull request #85242 from allevato/deterministic-json-ast
[AST] Replace pointers in JSON output with deterministic IDs.
2 parents e32692d + 5fd1cbb commit 8c1e0c8

File tree

2 files changed

+97
-13
lines changed

2 files changed

+97
-13
lines changed

lib/AST/ASTDumper.cpp

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -680,13 +680,23 @@ static StringRef getDumpString(StringRef s) {
680680
static unsigned getDumpString(unsigned value) {
681681
return value;
682682
}
683-
static size_t getDumpString(size_t value) {
684-
return value;
685-
}
686-
static void *getDumpString(void *value) { return value; }
683+
static size_t getDumpString(size_t value) { return value; }
687684

688685
static StringRef getDumpString(Identifier ident) { return ident.str(); }
689686

687+
// If you are reading this comment because a compiler error directed you
688+
// here, it's probably because you have tried to pass a pointer directly
689+
// into a function like `printField`. Please do not do this -- it makes
690+
// the output nondeterministic. For the default S-expression output this
691+
// is not typically an issue (as it is used mainly for debugging), but
692+
// this is particularly problematic for the JSON format since build
693+
// systems may want to cache those outputs based on the content hash.
694+
//
695+
// Please call `printPointerField` instead. The output format will be
696+
// the same for the default formatter, but the JSON formatter will
697+
// replace those pointers with unique deterministic identifiers.
698+
static void *getDumpString(void *value) = delete;
699+
690700
//===----------------------------------------------------------------------===//
691701
// Decl printing.
692702
//===----------------------------------------------------------------------===//
@@ -813,6 +823,10 @@ namespace {
813823
virtual void printSourceRange(const SourceRange R, const ASTContext *Ctx,
814824
Label label) = 0;
815825

826+
/// Prints the given pointer to an output stream, transforming the pointer
827+
/// if necessary to make it safe for the output format.
828+
virtual void printPointerToStream(void *pointer, llvm::raw_ostream &OS) = 0;
829+
816830
/// Indicates whether the output format is meant to be parsable. Parsable
817831
/// output should use structure rather than stringification to convey
818832
/// detailed information, and generally provides more information than the
@@ -906,6 +920,12 @@ namespace {
906920
}, label, RangeColor);
907921
}
908922

923+
void printPointerToStream(void *pointer, llvm::raw_ostream &OS) override {
924+
// The default S-expression format leaves the pointer as-is, since this
925+
// is useful when dumping AST nodes in the debugger.
926+
OS << pointer;
927+
}
928+
909929
bool isParsable() const override { return false; }
910930
};
911931

@@ -914,8 +934,12 @@ namespace {
914934
llvm::json::OStream OS;
915935
std::vector<bool> InObjectStack;
916936

937+
llvm::DenseMap<void *, int> DeterministicPointerIDs;
938+
int NextPointerID;
939+
917940
public:
918-
JSONWriter(raw_ostream &os, unsigned indent = 0) : OS(os, indent) {}
941+
JSONWriter(raw_ostream &os, unsigned indent = 0)
942+
: OS(os, indent), NextPointerID(1) {}
919943

920944
void printRecArbitrary(std::function<void(Label)> body,
921945
Label label) override {
@@ -1016,6 +1040,21 @@ namespace {
10161040
OS.attributeEnd();
10171041
}
10181042

1043+
void printPointerToStream(void *pointer, llvm::raw_ostream &OS) override {
1044+
// JSON output may be used by build systems that want deterministic
1045+
// output for caching purposes. Generate a unique ID the first time
1046+
// we see a new pointer.
1047+
int pointerID;
1048+
if (auto it = DeterministicPointerIDs.find(pointer);
1049+
it != DeterministicPointerIDs.end()) {
1050+
pointerID = it->second;
1051+
} else {
1052+
pointerID = NextPointerID++;
1053+
DeterministicPointerIDs[pointer] = pointerID;
1054+
}
1055+
OS << "replaced-pointer-" << pointerID;
1056+
}
1057+
10191058
bool isParsable() const override { return true; }
10201059
};
10211060

@@ -1495,6 +1534,18 @@ namespace {
14951534
label, color);
14961535
}
14971536

1537+
/// Print a field that is a bare pointer as a short keyword-style value.
1538+
/// For parsable output formats that need to be deterministic, each
1539+
/// pointer will be replaced by a unique ID. The same pointer will be
1540+
/// mapped to the same ID, so such nodes can still be related to each
1541+
/// other across the tree.
1542+
void printPointerField(void *value, Label label,
1543+
TerminalColor color = FieldLabelColor) {
1544+
printFieldRaw(
1545+
[&](raw_ostream &OS) { Writer.printPointerToStream(value, OS); },
1546+
label, color);
1547+
}
1548+
14981549
/// Print a field with a long value that will be automatically quoted and
14991550
/// escaped, printing the value by passing a closure that takes a
15001551
/// \c raw_ostream.
@@ -1668,8 +1719,8 @@ namespace {
16681719

16691720
template <typename T>
16701721
void printDeclContext(const T *D) {
1671-
printField(static_cast<void *>(D->getDeclContext()),
1672-
Label::always("decl_context"));
1722+
printPointerField(static_cast<void *>(D->getDeclContext()),
1723+
Label::always("decl_context"));
16731724
}
16741725

16751726
/// Prints a field containing the name or the USR (based on parsability of
@@ -2704,8 +2755,8 @@ namespace {
27042755
printHead("pattern_entry", FieldLabelColor, label);
27052756

27062757
if (PBD->getInitContext(idx))
2707-
printField(PBD->getInitContext(idx),
2708-
Label::always("init_context"));
2758+
printPointerField(PBD->getInitContext(idx),
2759+
Label::always("init_context"));
27092760

27102761
printRec(PBD->getPattern(idx), Label::optional("pattern"));
27112762
if (PBD->getOriginalInit(idx)) {
@@ -4158,8 +4209,7 @@ class PrintExpr : public ExprVisitor<PrintExpr, void, Label>,
41584209

41594210
void visitOpaqueValueExpr(OpaqueValueExpr *E, Label label) {
41604211
printCommon(E, "opaque_value_expr", label);
4161-
printNameRaw([&](raw_ostream &OS) { OS << (void*)E; },
4162-
Label::optional("identity"));
4212+
printPointerField(static_cast<void *>(E), Label::optional("identity"));
41634213
printFoot();
41644214
}
41654215

@@ -5163,7 +5213,7 @@ class PrintAttribute : public AttributeVisitor<PrintAttribute, void, Label>,
51635213
void visitCustomAttr(CustomAttr *Attr, Label label) {
51645214
printCommon(Attr, "custom_attr", label);
51655215

5166-
printField(
5216+
printPointerField(
51675217
static_cast<void *>(static_cast<DeclContext *>(Attr->getInitContext())),
51685218
Label::always("init_context"));
51695219

@@ -6323,7 +6373,7 @@ namespace {
63236373
Label label) {
63246374
printCommon(className, label);
63256375

6326-
printField(static_cast<void *>(T), Label::always("address"));
6376+
printPointerField(static_cast<void *>(T), Label::always("address"));
63276377
printFlag(T->requiresClass(), "class");
63286378
if (auto layout = T->getLayoutConstraint()) {
63296379
printFieldRaw([&](raw_ostream &OS) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: %target-swift-frontend -target %target-swift-5.9-abi-triple -swift-version 6 -parse-as-library -dump-ast -dump-ast-format json %s -module-name main -o - > %t/main.json
3+
// RUN: %{python} -c 'import json, sys; print(json.dumps(json.load(sys.stdin), indent=4))' < %t/main.json | %FileCheck %s
4+
5+
// CHECK: "_kind": "func_decl",
6+
// CHECK: "decl_context": "replaced-pointer-[[FILE_ID:[0-9]+]]",
7+
// CHECK: "name": "f"
8+
func f() {
9+
// CHECK: "_kind": "pattern_binding_decl",
10+
// CHECK: "decl_context": "replaced-pointer-[[F_ID:[0-9]+]]",
11+
// CHECK: "name": "x"
12+
// CHECK: "_kind": "var_decl",
13+
// CHECK: "decl_context": "replaced-pointer-[[F_ID]]",
14+
var x = 0
15+
16+
// CHECK: "_kind": "pattern_binding_decl",
17+
// CHECK: "decl_context": "replaced-pointer-[[F_ID:[0-9]+]]",
18+
// CHECK: "name": "y"
19+
// CHECK: "_kind": "var_decl",
20+
// CHECK: "decl_context": "replaced-pointer-[[F_ID]]",
21+
var y = 0
22+
}
23+
24+
// CHECK: "_kind": "func_decl",
25+
// CHECK: "decl_context": "replaced-pointer-[[FILE_ID]]",
26+
// CHECK: "name": "g"
27+
func g() {
28+
// CHECK: "_kind": "pattern_binding_decl",
29+
// CHECK: "decl_context": "replaced-pointer-[[G_ID:[0-9]+]]",
30+
// CHECK: "name": "z"
31+
// CHECK: "_kind": "var_decl",
32+
// CHECK: "decl_context": "replaced-pointer-[[G_ID]]",
33+
var z = 0
34+
}

0 commit comments

Comments
 (0)