Skip to content

Commit 5fd1cbb

Browse files
committed
[AST] Replace pointers in JSON output with deterministic IDs.
There are a few places in the AST dumper where it prints hexadecimal pointer addresses. This can be useful when dumping nodes in the debugger, but it makes the output nondeterministic when can cause problems for build systems that cache outputs. In the JSON format only, replace these pointers with the string `"replaced-pointer-XX"`, where `XX` is an increasing integer ID. We keep track of the pointers that have been replaced so that the same pointer always maps to the same ID; this means that those nodes can still be related to each other when reading the AST later, if needed.
1 parent a8cef44 commit 5fd1cbb

File tree

2 files changed

+97
-13
lines changed

2 files changed

+97
-13
lines changed

lib/AST/ASTDumper.cpp

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -680,13 +680,23 @@ static StringRef getDumpString(StringRef s) {
680680
static unsigned getDumpString(unsigned value) {
681681
return value;
682682
}
683-
static size_t getDumpString(size_t value) {
684-
return value;
685-
}
686-
static void *getDumpString(void *value) { return value; }
683+
static size_t getDumpString(size_t value) { return value; }
687684

688685
static StringRef getDumpString(Identifier ident) { return ident.str(); }
689686

687+
// If you are reading this comment because a compiler error directed you
688+
// here, it's probably because you have tried to pass a pointer directly
689+
// into a function like `printField`. Please do not do this -- it makes
690+
// the output nondeterministic. For the default S-expression output this
691+
// is not typically an issue (as it is used mainly for debugging), but
692+
// this is particularly problematic for the JSON format since build
693+
// systems may want to cache those outputs based on the content hash.
694+
//
695+
// Please call `printPointerField` instead. The output format will be
696+
// the same for the default formatter, but the JSON formatter will
697+
// replace those pointers with unique deterministic identifiers.
698+
static void *getDumpString(void *value) = delete;
699+
690700
//===----------------------------------------------------------------------===//
691701
// Decl printing.
692702
//===----------------------------------------------------------------------===//
@@ -813,6 +823,10 @@ namespace {
813823
virtual void printSourceRange(const SourceRange R, const ASTContext *Ctx,
814824
Label label) = 0;
815825

826+
/// Prints the given pointer to an output stream, transforming the pointer
827+
/// if necessary to make it safe for the output format.
828+
virtual void printPointerToStream(void *pointer, llvm::raw_ostream &OS) = 0;
829+
816830
/// Indicates whether the output format is meant to be parsable. Parsable
817831
/// output should use structure rather than stringification to convey
818832
/// detailed information, and generally provides more information than the
@@ -906,6 +920,12 @@ namespace {
906920
}, label, RangeColor);
907921
}
908922

923+
void printPointerToStream(void *pointer, llvm::raw_ostream &OS) override {
924+
// The default S-expression format leaves the pointer as-is, since this
925+
// is useful when dumping AST nodes in the debugger.
926+
OS << pointer;
927+
}
928+
909929
bool isParsable() const override { return false; }
910930
};
911931

@@ -914,8 +934,12 @@ namespace {
914934
llvm::json::OStream OS;
915935
std::vector<bool> InObjectStack;
916936

937+
llvm::DenseMap<void *, int> DeterministicPointerIDs;
938+
int NextPointerID;
939+
917940
public:
918-
JSONWriter(raw_ostream &os, unsigned indent = 0) : OS(os, indent) {}
941+
JSONWriter(raw_ostream &os, unsigned indent = 0)
942+
: OS(os, indent), NextPointerID(1) {}
919943

920944
void printRecArbitrary(std::function<void(Label)> body,
921945
Label label) override {
@@ -1016,6 +1040,21 @@ namespace {
10161040
OS.attributeEnd();
10171041
}
10181042

1043+
void printPointerToStream(void *pointer, llvm::raw_ostream &OS) override {
1044+
// JSON output may be used by build systems that want deterministic
1045+
// output for caching purposes. Generate a unique ID the first time
1046+
// we see a new pointer.
1047+
int pointerID;
1048+
if (auto it = DeterministicPointerIDs.find(pointer);
1049+
it != DeterministicPointerIDs.end()) {
1050+
pointerID = it->second;
1051+
} else {
1052+
pointerID = NextPointerID++;
1053+
DeterministicPointerIDs[pointer] = pointerID;
1054+
}
1055+
OS << "replaced-pointer-" << pointerID;
1056+
}
1057+
10191058
bool isParsable() const override { return true; }
10201059
};
10211060

@@ -1495,6 +1534,18 @@ namespace {
14951534
label, color);
14961535
}
14971536

1537+
/// Print a field that is a bare pointer as a short keyword-style value.
1538+
/// For parsable output formats that need to be deterministic, each
1539+
/// pointer will be replaced by a unique ID. The same pointer will be
1540+
/// mapped to the same ID, so such nodes can still be related to each
1541+
/// other across the tree.
1542+
void printPointerField(void *value, Label label,
1543+
TerminalColor color = FieldLabelColor) {
1544+
printFieldRaw(
1545+
[&](raw_ostream &OS) { Writer.printPointerToStream(value, OS); },
1546+
label, color);
1547+
}
1548+
14981549
/// Print a field with a long value that will be automatically quoted and
14991550
/// escaped, printing the value by passing a closure that takes a
15001551
/// \c raw_ostream.
@@ -1668,8 +1719,8 @@ namespace {
16681719

16691720
template <typename T>
16701721
void printDeclContext(const T *D) {
1671-
printField(static_cast<void *>(D->getDeclContext()),
1672-
Label::always("decl_context"));
1722+
printPointerField(static_cast<void *>(D->getDeclContext()),
1723+
Label::always("decl_context"));
16731724
}
16741725

16751726
/// Prints a field containing the name or the USR (based on parsability of
@@ -2704,8 +2755,8 @@ namespace {
27042755
printHead("pattern_entry", FieldLabelColor, label);
27052756

27062757
if (PBD->getInitContext(idx))
2707-
printField(PBD->getInitContext(idx),
2708-
Label::always("init_context"));
2758+
printPointerField(PBD->getInitContext(idx),
2759+
Label::always("init_context"));
27092760

27102761
printRec(PBD->getPattern(idx), Label::optional("pattern"));
27112762
if (PBD->getOriginalInit(idx)) {
@@ -4158,8 +4209,7 @@ class PrintExpr : public ExprVisitor<PrintExpr, void, Label>,
41584209

41594210
void visitOpaqueValueExpr(OpaqueValueExpr *E, Label label) {
41604211
printCommon(E, "opaque_value_expr", label);
4161-
printNameRaw([&](raw_ostream &OS) { OS << (void*)E; },
4162-
Label::optional("identity"));
4212+
printPointerField(static_cast<void *>(E), Label::optional("identity"));
41634213
printFoot();
41644214
}
41654215

@@ -5163,7 +5213,7 @@ class PrintAttribute : public AttributeVisitor<PrintAttribute, void, Label>,
51635213
void visitCustomAttr(CustomAttr *Attr, Label label) {
51645214
printCommon(Attr, "custom_attr", label);
51655215

5166-
printField(
5216+
printPointerField(
51675217
static_cast<void *>(static_cast<DeclContext *>(Attr->getInitContext())),
51685218
Label::always("init_context"));
51695219

@@ -6323,7 +6373,7 @@ namespace {
63236373
Label label) {
63246374
printCommon(className, label);
63256375

6326-
printField(static_cast<void *>(T), Label::always("address"));
6376+
printPointerField(static_cast<void *>(T), Label::always("address"));
63276377
printFlag(T->requiresClass(), "class");
63286378
if (auto layout = T->getLayoutConstraint()) {
63296379
printFieldRaw([&](raw_ostream &OS) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: %target-swift-frontend -target %target-swift-5.9-abi-triple -swift-version 6 -parse-as-library -dump-ast -dump-ast-format json %s -module-name main -o - > %t/main.json
3+
// RUN: %{python} -c 'import json, sys; print(json.dumps(json.load(sys.stdin), indent=4))' < %t/main.json | %FileCheck %s
4+
5+
// CHECK: "_kind": "func_decl",
6+
// CHECK: "decl_context": "replaced-pointer-[[FILE_ID:[0-9]+]]",
7+
// CHECK: "name": "f"
8+
func f() {
9+
// CHECK: "_kind": "pattern_binding_decl",
10+
// CHECK: "decl_context": "replaced-pointer-[[F_ID:[0-9]+]]",
11+
// CHECK: "name": "x"
12+
// CHECK: "_kind": "var_decl",
13+
// CHECK: "decl_context": "replaced-pointer-[[F_ID]]",
14+
var x = 0
15+
16+
// CHECK: "_kind": "pattern_binding_decl",
17+
// CHECK: "decl_context": "replaced-pointer-[[F_ID:[0-9]+]]",
18+
// CHECK: "name": "y"
19+
// CHECK: "_kind": "var_decl",
20+
// CHECK: "decl_context": "replaced-pointer-[[F_ID]]",
21+
var y = 0
22+
}
23+
24+
// CHECK: "_kind": "func_decl",
25+
// CHECK: "decl_context": "replaced-pointer-[[FILE_ID]]",
26+
// CHECK: "name": "g"
27+
func g() {
28+
// CHECK: "_kind": "pattern_binding_decl",
29+
// CHECK: "decl_context": "replaced-pointer-[[G_ID:[0-9]+]]",
30+
// CHECK: "name": "z"
31+
// CHECK: "_kind": "var_decl",
32+
// CHECK: "decl_context": "replaced-pointer-[[G_ID]]",
33+
var z = 0
34+
}

0 commit comments

Comments
 (0)