From 9b6d4cb0fdec1bca5a7fe50cefcfa2161aa185b4 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Fri, 23 Aug 2024 11:37:59 +0200 Subject: [PATCH 01/27] iterative Tarjan Algorithm implemenmted --- .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 249 +++++++ include/phasar/PhasarLLVM/Utils/Compressor.h | 212 ++++++ .../phasar/PhasarLLVM/ControlFlow/CallGraph.h | 304 ++++++++ .../include/phasar/PhasarLLVM/Utils/SCC.cpp | 196 +++++ .../include/phasar/PhasarLLVM/Utils/SCC.h | 71 ++ .../phasar/PhasarLLVM/Utils/SCCGeneric.cpp | 196 +++++ .../phasar/PhasarLLVM/Utils/SCCGeneric.h | 72 ++ .../PhasarLLVM/Utils/TypeAssignmentGraph.cpp | 698 ++++++++++++++++++ .../PhasarLLVM/Utils/TypeAssignmentGraph.h | 150 ++++ .../utils/include/phasar/Utils/Compressor.h | 212 ++++++ 10 files changed, 2360 insertions(+) create mode 100644 include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h create mode 100644 include/phasar/PhasarLLVM/Utils/Compressor.h create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp create mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h create mode 100644 phasar/utils/include/phasar/Utils/Compressor.h diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h new file mode 100644 index 0000000000..6697347d75 --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -0,0 +1,249 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +// #include "phasar/PhasarLLVM/Utils/Compressor.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace psr { +class LLVMBasedICFG; +} // namespace psr + +namespace psr::analysis::call_graph { +// struct TypeAssignmentGraph; +enum class GraphNodeId : uint32_t; + +enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; + +// holds the scc's of a given graph +struct SCCHolder { + llvm::SmallVector SCCOfNode{}; + llvm::SmallVector> NodesInSCC{}; + size_t NumSCCs = 0; +}; + +// holds a graph were the scc's are compressed to a single node. Resulting graph +// is a DAG +struct SCCCallers { + llvm::SmallVector, 0> ChildrenOfSCC{}; + llvm::SmallVector SCCRoots{}; + + template + void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); +}; + +// holds topologically sorted scccallers +struct SCCOrder { + llvm::SmallVector SCCIds; +}; + +struct SCCData { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCData(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +struct SCCDataIt { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + llvm::SmallVector> CallStack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCDataIt(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +static void setMin(uint32_t &InOut, uint32_t Other) { + if (Other < InOut) { + InOut = Other; + } +} + +// TODO: Non-recursive version +template +static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, + SCCHolder &Holder) { + // See + // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components + + auto CurrTime = Data.Time++; + Data.Disc[size_t(CurrNode)] = CurrTime; + Data.Low[size_t(CurrNode)] = CurrTime; + Data.Stack.push_back(CurrNode); + Data.OnStack.set(uint32_t(CurrNode)); + + for (auto SuccNode : Graph.Adj[size_t(CurrNode)]) { + if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { + // Tree-edge: Not seen yet --> recurse + + computeSCCsRec(Graph, SuccNode, Data, Holder); + setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); + } else if (Data.OnStack.test(uint32_t(SuccNode))) { + // Back-edge --> circle! + + setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); + } + } + + if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { + // Found SCC + + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != CurrNode) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } +} + +// Iterative IMplementation for Tarjan's SCC Alg. +// -> Heapoverflow through simulated Stack? +template +static void TarjanIt(const G &Graph, GraphNodeId StartNode, SCCDataIt &Data, + SCCHolder &Holder) { + + auto CurrTime = Data.Time; + for (uint32_t Vertex = 0; Vertex < Graph.Nodes.size(); Vertex++) { + if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { + Data.CallStack.push_back({GraphNodeId(Vertex), 0}); + while (!Data.CallStack.empty()) { + auto Curr = Data.CallStack.pop_back_val(); + // Curr.second = 0 implies that Curr.fist was not visited before + if (Curr.second == 0) { + Data.Disc[size_t(Curr.first)] = CurrTime; + Data.Low[size_t(Curr.first)] = CurrTime; + CurrTime++; + Data.Stack.push_back(Curr.first); + Data.OnStack.set(uint32_t(Curr.first)); + } + // Curr.second > 0 implies that we came back from a recursive call + if (Curr.second > 0) { + //??? + setMin(Data.Low[size_t(Curr.first)], + Data.Low[size_t(Curr.second) - 1]); + } + // find the next recursive function call + while (Curr.second < Graph.getEdges(Curr.first).size() && + Data.Disc[size_t(Graph.getEdges(Curr.first)[Curr.second])]) { + GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; + if (Data.OnStack.test(uint32_t(W))) { + setMin(Data.Low[size_t(Curr.first)], Data.Disc[size_t(W)]); + } + Curr.second++; + // If a Node u is undiscovered i.e. Data.Disc[size_t(u)] = UINT32_MAX + // start a recursive function call + if (Curr.second < Graph.getEdges(Curr.first).size()) { + GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; + Data.CallStack.push_back({Curr.first, Curr.second++}); + Data.CallStack.push_back({U, 0}); + } + // If Curr.first is the root of a connected component i.e. Data.Disc = + // Data.Low + if (Data.Low[size_t(Curr.first)] == Data.Disc[size_t(Curr.first)]) { + //-> SCC found + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != Curr.first) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + } + } + } + } +} + +template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { + SCCHolder Ret{}; + + auto NumNodes = Graph.Nodes.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) { + return Ret; + } + + SCCData Data(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) { + computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + } + } + + return Ret; +} + +template +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers +computeSCCCallers(const G &Graph, const SCCHolder &SCCs); + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder +computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); +} // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using SCCId = psr::analysis::call_graph::SCCId; + + static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } + static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } + static inline auto getHashValue(SCCId Id) noexcept { + return llvm::hash_value(uint32_t(Id)); + } + static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } +}; +} // namespace llvm diff --git a/include/phasar/PhasarLLVM/Utils/Compressor.h b/include/phasar/PhasarLLVM/Utils/Compressor.h new file mode 100644 index 0000000000..4fbba84490 --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/Compressor.h @@ -0,0 +1,212 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/TypeTraits.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include +#include +#include + +namespace psr { +template +class Compressor; + +template +class Compressor>> { +public: + void reserve(size_t Capacity) { + assert(Capacity <= UINT32_MAX); + ToInt.reserve(Capacity); + FromInt.reserve(Capacity); + } + + Id getOrInsert(T Elem) { + auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); + if (Inserted) { + FromInt.push_back(Elem); + } + return It->second; + } + + std::pair insert(T Elem) { + auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); + if (Inserted) { + FromInt.push_back(Elem); + } + return {It->second, Inserted}; + } + + [[nodiscard]] std::optional getOrNull(T Elem) const { + if (auto It = ToInt.find(Elem); It != ToInt.end()) { + return It->second; + } + return std::nullopt; + } + + [[nodiscard]] Id get(T Elem) const { + auto It = ToInt.find(Elem); + assert(It != ToInt.end()); + return It->second; + } + + [[nodiscard]] T operator[](Id Idx) const noexcept { + assert(size_t(Idx) < FromInt.size()); + return FromInt[size_t(Idx)]; + } + + [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } + [[nodiscard]] size_t capacity() const noexcept { + return FromInt.capacity() + + ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); + } + + auto begin() const noexcept { return FromInt.begin(); } + auto end() const noexcept { return FromInt.end(); } + +private: + llvm::DenseMap ToInt; + llvm::SmallVector FromInt; +}; + +template +class Compressor>> { +public: + void reserve(size_t Capacity) { + assert(Capacity <= UINT32_MAX); + ToInt.reserve(Capacity); + } + + Id getOrInsert(const T &Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(Elem); + ToInt[Ins] = Ret; + return Ret; + } + + Id getOrInsert(T &&Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(std::move(Elem)); + ToInt[Ins] = Ret; + return Ret; + } + + std::pair insert(const T &Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(Elem); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + std::pair insert(T &&Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(std::move(Elem)); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + [[nodiscard]] std::optional getOrNull(const T &Elem) const { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + return std::nullopt; + } + + [[nodiscard]] Id get(const T &Elem) const { + auto It = ToInt.find(&Elem); + assert(It != ToInt.end()); + return It->second; + } + + const T &operator[](Id Idx) const noexcept { + assert(size_t(Idx) < FromInt.size()); + return FromInt[size_t(Idx)]; + } + + [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } + [[nodiscard]] size_t capacity() const noexcept { + return FromInt.size() + + ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); + } + + auto begin() const noexcept { return FromInt.begin(); } + auto end() const noexcept { return FromInt.end(); } + +private: + struct DSI : llvm::DenseMapInfo { + static auto getHashValue(const T *Elem) noexcept { + assert(Elem != nullptr); + if constexpr (has_llvm_dense_map_info) { + return llvm::DenseMapInfo::getHashValue(*Elem); + } else { + return std::hash{}(*Elem); + } + } + static auto isEqual(const T *LHS, const T *RHS) noexcept { + if (LHS == RHS) { + return true; + } + if (LHS == DSI::getEmptyKey() || LHS == DSI::getTombstoneKey() || + RHS == DSI::getEmptyKey() || RHS == DSI::getTombstoneKey()) { + return false; + } + if constexpr (has_llvm_dense_map_info) { + return llvm::DenseMapInfo::isEqual(*LHS, *RHS); + } else { + return *LHS == *RHS; + } + } + }; + + std::deque FromInt; + llvm::DenseMap ToInt; +}; + +struct NoneCompressor final { + constexpr NoneCompressor() noexcept = default; + + template >> + constexpr NoneCompressor(const T & /*unused*/) noexcept {} + + template + [[nodiscard]] decltype(auto) getOrInsert(T &&Val) const noexcept { + return std::forward(Val); + } + template + [[nodiscard]] decltype(auto) operator[](T &&Val) const noexcept { + return std::forward(Val); + } + void reserve(size_t /*unused*/) const noexcept {} + + [[nodiscard]] size_t size() const noexcept { return 0; } + [[nodiscard]] size_t capacity() const noexcept { return 0; } +}; + +} // namespace psr diff --git a/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h b/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h new file mode 100644 index 0000000000..590b16964e --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h @@ -0,0 +1,304 @@ +/****************************************************************************** + * Copyright (c) 2023 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_CONTROLFLOW_CALLGRAPH_H +#define PHASAR_CONTROLFLOW_CALLGRAPH_H + +#include "phasar/ControlFlow/CallGraphBase.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/StableVector.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +#include "nlohmann/json.hpp" + +#include +#include +#include + +namespace psr { +template class CallGraphBuilder; +template class CallGraph; + +template struct CGTraits> { + using n_t = N; + using f_t = F; +}; + +/// An explicit graph-representation of a call-graph. Only represents the data, +/// not the call-graph analysis that creates it. +/// +/// This type is immutable. To incrementally build it from your call-graph +/// analysis, use the CallGraphBuilder +template +class CallGraph : public CallGraphBase> { + using base_t = CallGraphBase>; + friend base_t; + friend class CallGraphBuilder; + +public: + using typename base_t::f_t; + using typename base_t::n_t; + using FunctionVertexTy = llvm::SmallVector; + using InstructionVertexTy = llvm::SmallVector; + + /// Creates a new, empty call-graph + CallGraph() noexcept = default; + + /// Deserializes a previously computed call-graph + template + [[nodiscard]] static CallGraph + deserialize(const nlohmann::json &PrecomputedCG, + FunctionGetter GetFunctionFromName, + InstructionGetter GetInstructionFromId); + + /// A range of all functions that are vertices in the call-graph. The number + /// of vertex functions can be retrieved by getNumVertexFunctions(). + [[nodiscard]] auto getAllVertexFunctions() const noexcept { + return llvm::make_first_range(CallersOf); + } + + /// A range of all call-sites that are vertices in the call-graph. The number + /// of vertex-callsites can be retrived by getNumVertexCallSites(). + [[nodiscard]] auto getAllVertexCallSites() const noexcept { + return llvm::make_first_range(CalleesAt); + } + + [[nodiscard]] size_t getNumVertexFunctions() const noexcept { + return CallersOf.size(); + } + [[nodiscard]] size_t getNumVertexCallSites() const noexcept { + return CalleesAt.size(); + } + + /// The number of functions within this call-graph + [[nodiscard]] size_t size() const noexcept { return getNumVertexFunctions(); } + + [[nodiscard]] bool empty() const noexcept { return CallersOf.empty(); } + + /// Creates a JSON representation of this call-graph suitable for presistent + /// storage. + /// Use the ctor taking a json object for deserialization + template + [[nodiscard]] nlohmann::json getAsJson(FunctionIdGetter GetFunctionId, + InstIdGetter GetInstructionId) const { + nlohmann::json J; + + for (const auto &[Fun, Callers] : CallersOf) { + auto &JCallers = J[std::invoke(GetFunctionId, Fun)]; + + for (const auto &CS : *Callers) { + JCallers.push_back(std::invoke(GetInstructionId, CS)); + } + } + + return J; + } + + template + void printAsDot(llvm::raw_ostream &OS, FunctionLabelGetter GetFunctionLabel, + InstParentGetter GetFunctionFromInst, + InstLabelGetter GetInstLabel) const { + OS << "digraph CallGraph{\n"; + scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + llvm::DenseMap Fun2Id; + Fun2Id.reserve(CallersOf.size()); + + size_t CurrId = 0; + for (const auto &Fun : getAllVertexFunctions()) { + OS << CurrId << "[label=\""; + OS.write_escaped(std::invoke(GetFunctionLabel, Fun)) << "\"];\n"; + Fun2Id[Fun] = CurrId++; + } + + for (const auto &[CS, Callees] : CalleesAt) { + const auto &Fun = std::invoke(GetFunctionFromInst, CS); + + for (const auto &Succ : *Callees) { + OS << Fun2Id.lookup(Fun) << "->" << Fun2Id.lookup(Succ) << "[label=\""; + OS.write_escaped(std::invoke(GetInstLabel, CS)) << "\"];\n"; + } + } + } + +private: + [[nodiscard]] llvm::ArrayRef + getCalleesOfCallAtImpl(ByConstRef Inst) const noexcept { + if (const auto *CalleesPtr = CalleesAt.lookup(Inst)) { + return *CalleesPtr; + } + return {}; + } + + [[nodiscard]] llvm::ArrayRef + getCallersOfImpl(ByConstRef Fun) const noexcept { + if (const auto *CallersPtr = CallersOf.lookup(Fun)) { + return *CallersPtr; + } + return {}; + } + + // --- + + StableVector InstVertexOwner; + std::vector FunVertexOwner; + + llvm::DenseMap CalleesAt{}; + llvm::DenseMap CallersOf{}; +}; + +/// A mutable wrapper over a CallGraph. Use this to build a call-graph from +/// within your call-graph ananlysis. +template class CallGraphBuilder { +public: + using n_t = typename CallGraph::n_t; + using f_t = typename CallGraph::f_t; + using FunctionVertexTy = typename CallGraph::FunctionVertexTy; + using InstructionVertexTy = typename CallGraph::InstructionVertexTy; + + void reserve(size_t MaxNumFunctions) { + CG.FunVertexOwner.reserve(MaxNumFunctions); + CG.CalleesAt.reserve(MaxNumFunctions); + CG.CallersOf.reserve(MaxNumFunctions); + } + + /// Registeres a new function in the call-graph. Returns a list of all + /// call-sites that are known so far to potentially call this function. + /// Do not manually add elements to this vector -- use addCallEdge instead. + [[nodiscard]] FunctionVertexTy *addFunctionVertex(f_t Fun) { + auto [It, Inserted] = CG.CallersOf.try_emplace(std::move(Fun), nullptr); + if (Inserted) { + auto Cap = CG.FunVertexOwner.capacity(); + assert(CG.FunVertexOwner.size() < Cap && + "Trying to add more than MaxNumFunctions Function Vertices"); + It->second = &CG.FunVertexOwner.emplace_back(); + } + return It->second; + } + + /// Registeres a new call-site in the call-graph. Returns a list of all + /// callee functions that are known so far to potentially be called by this + /// function. + /// Do not manually add elements to this vector -- use addCallEdge instead. + [[nodiscard]] InstructionVertexTy *addInstructionVertex(n_t Inst) { + auto [It, Inserted] = CG.CalleesAt.try_emplace(std::move(Inst), nullptr); + if (Inserted) { + It->second = &CG.InstVertexOwner.emplace_back(); + } + return It->second; + } + + /// Tries to lookup the InstructionVertex for the given call-site. Returns + /// nullptr on failure. + [[nodiscard]] InstructionVertexTy * + getInstVertexOrNull(ByConstRef Inst) const noexcept { + return CG.CalleesAt.lookup(Inst); + } + + /// Adds a new directional edge to the call-graph indicating that CS may call + /// Callee + void addCallEdge(n_t CS, f_t Callee) { + auto IVtx = addInstructionVertex(CS); + auto FVtx = addFunctionVertex(Callee); + addCallEdge(std::move(CS), IVtx, std::move(Callee), FVtx); + } + + /// Same as addCallEdge(n_t, f_t), but uses an already known + /// InstructionVertexTy to save a lookup + void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee) { + auto *Callers = addFunctionVertex(Callee); + addCallEdge(std::move(CS), Callees, std::move(Callee), Callers); + } + + /// Same as addCallEdge(n_t, f_t), but uses an already known + /// FunctionVertexTy to save a lookup + void addCallEdge(n_t CS, f_t Callee, FunctionVertexTy *Callers) { + auto *Callees = addInstructionVertex(CS); + addCallEdge(std::move(CS), Callees, std::move(Callee), Callers); + } + + /// Moves the completely built call-graph out of this builder for further + /// use. Do not use the builder after it anymore. + [[nodiscard]] CallGraph consumeCallGraph() noexcept { + return std::move(CG); + } + + /// Returns a view on the current (partial) call-graph that has already been + /// constructed + [[nodiscard]] const CallGraph &viewCallGraph() const noexcept { + return CG; + } + +private: + void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee, + FunctionVertexTy *Callers) { + Callees->push_back(std::move(Callee)); + Callers->push_back(std::move(CS)); + } + + CallGraph CG{}; +}; + +template +template +[[nodiscard]] CallGraph +CallGraph::deserialize(const nlohmann::json &PrecomputedCG, + FunctionGetter GetFunctionFromName, + InstructionGetter GetInstructionFromId) { + if (!PrecomputedCG.is_object()) { + PHASAR_LOG_LEVEL_CAT(ERROR, "CallGraph", "Invalid Json. Expected object"); + return {}; + } + + CallGraphBuilder CGBuilder; + CGBuilder.reserve(PrecomputedCG.size()); + + for (const auto &[FunName, CallerIDs] : PrecomputedCG.items()) { + const auto &Fun = std::invoke(GetFunctionFromName, FunName); + if (!Fun) { + PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph", + "Invalid function name: " << FunName); + continue; + } + + auto *CEdges = CGBuilder.addFunctionVertex(Fun); + CEdges->reserve(CallerIDs.size()); + + for (const auto &JId : CallerIDs) { + auto Id = JId.get(); + const auto &CS = std::invoke(GetInstructionFromId, Id); + if (!CS) { + PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph", + "Invalid CAll-Instruction Id: " << Id); + } + + CGBuilder.addCallEdge(CS, Fun); + } + } + return CGBuilder.consumeCallGraph(); +} +} // namespace psr + +namespace llvm { +class Function; +class Instruction; +} // namespace llvm + +extern template class psr::CallGraph; +extern template class psr::CallGraphBuilder; + +#endif // PHASAR_CONTROLFLOW_CALLGRAPH_H diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp new file mode 100644 index 0000000000..a210b91f35 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp @@ -0,0 +1,196 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "SCC.h" + +#include "llvm/ADT/SmallBitVector.h" + +#include "../../../../../utils/include/phasar/Utils/Compressor.h" +#include "TypeAssignmentGraph.h" + +#include +#include +#include + +using namespace psr; + +using SCCId = analysis::call_graph::SCCId; + +struct SCCData { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCData(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +static void setMin(uint32_t &InOut, uint32_t Other) { + if (Other < InOut) + InOut = Other; +} + +// TODO: Non-recursive version +static void computeSCCsRec(const analysis::call_graph::TypeAssignmentGraph &TAG, + analysis::call_graph::TAGNodeId CurrNode, + SCCData &Data, + psr::analysis::call_graph::SCCHolder &Holder) { + // See + // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components + + auto CurrTime = Data.Time++; + Data.Disc[size_t(CurrNode)] = CurrTime; + Data.Low[size_t(CurrNode)] = CurrTime; + Data.Stack.push_back(CurrNode); + Data.OnStack.set(uint32_t(CurrNode)); + + for (auto SuccNode : TAG.Adj[size_t(CurrNode)]) { + if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { + // Tree-edge: Not seen yet --> recurse + + computeSCCsRec(TAG, SuccNode, Data, Holder); + setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); + } else if (Data.OnStack.test(uint32_t(SuccNode))) { + // Back-edge --> circle! + + setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); + } + } + + if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { + // Found SCC + + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != CurrNode) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } +} + +auto analysis::call_graph::computeSCCs(const TypeAssignmentGraph &TAG) + -> SCCHolder { + SCCHolder Ret{}; + + auto NumNodes = TAG.Nodes.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) + return Ret; + + SCCData Data(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) + computeSCCsRec(TAG, TAGNodeId(FunId), Data, Ret); + } + + return Ret; +} + +auto analysis::call_graph::computeSCCCallers(const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) + -> SCCCallers { + SCCCallers Ret; + Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); + + llvm::SmallBitVector Roots(SCCs.NumSCCs, true); + + size_t NodeId = 0; + for (const auto &SuccNodes : TAG.Adj) { + auto SrcSCC = SCCs.SCCOfNode[NodeId]; + + for (auto SuccNode : SuccNodes) { + auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; + if (DestSCC != SrcSCC) { + Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); + Roots.reset(uint32_t(DestSCC)); + } + } + + ++NodeId; + } + + Ret.SCCRoots.reserve(Roots.count()); + for (auto Rt : Roots.set_bits()) { + Ret.SCCRoots.push_back(SCCId(Rt)); + } + + return Ret; +} + +void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, + const TypeAssignmentGraph &TAG) { + OS << "digraph SCCTAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { + OS << " " << Ctr << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = TAG.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + } + + OS << '\n'; + + size_t Ctr = 0; + for (const auto &Targets : ChildrenOfSCC) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} + +auto analysis::call_graph::computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers) + -> SCCOrder { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.NumSCCs); + + llvm::SmallBitVector Seen; + Seen.resize(SCCs.NumSCCs); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.set(uint32_t(CurrSCC)); + for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { + if (!Seen.test(uint32_t(Caller))) + Dfs(Dfs, Caller); + } + Ret.SCCIds.push_back(CurrSCC); + }; + + for (auto Leaf : Callers.SCCRoots) { + if (!Seen.test(uint32_t(Leaf))) + Dfs(Dfs, Leaf); + } + + std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); + + return Ret; +} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h new file mode 100644 index 0000000000..3643011cb0 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h @@ -0,0 +1,71 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include "../../../../../utils/include/phasar/Utils/Compressor.h" + +namespace psr { +class LLVMBasedICFG; +} // namespace psr + +namespace psr::analysis::call_graph { +struct TypeAssignmentGraph; +enum class TAGNodeId : uint32_t; + +enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; + +struct SCCHolder { + llvm::SmallVector SCCOfNode{}; + llvm::SmallVector> NodesInSCC{}; + size_t NumSCCs = 0; +}; + +struct SCCCallers { + llvm::SmallVector, 0> ChildrenOfSCC{}; + llvm::SmallVector SCCRoots{}; + + void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, + const TypeAssignmentGraph &TAG); +}; + +struct SCCOrder { + llvm::SmallVector SCCIds; +}; + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCHolder +computeSCCs(const TypeAssignmentGraph &TAG); + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers +computeSCCCallers(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs); + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder +computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); +} // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using SCCId = psr::analysis::call_graph::SCCId; + + static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } + static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } + static inline auto getHashValue(SCCId Id) noexcept { + return llvm::hash_value(uint32_t(Id)); + } + static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } +}; +} // namespace llvm diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp new file mode 100644 index 0000000000..099592d172 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp @@ -0,0 +1,196 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "SCCGeneric.h" + +#include "llvm/ADT/SmallBitVector.h" + +#include "../../../../../utils/include/phasar/Utils/Compressor.h" +#include "TypeAssignmentGraph.h" + +#include +#include +#include + +using namespace psr; + +using SCCId = analysis::call_graph::SCCId; + +struct SCCData { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCData(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +static void setMin(uint32_t &InOut, uint32_t Other) { + if (Other < InOut) + InOut = Other; +} + +// TODO: Non-recursive version +static void computeSCCsRec(const analysis::call_graph::TypeAssignmentGraph &TAG, + analysis::call_graph::TAGNodeId CurrNode, + SCCData &Data, + psr::analysis::call_graph::SCCHolder &Holder) { + // See + // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components + + auto CurrTime = Data.Time++; + Data.Disc[size_t(CurrNode)] = CurrTime; + Data.Low[size_t(CurrNode)] = CurrTime; + Data.Stack.push_back(CurrNode); + Data.OnStack.set(uint32_t(CurrNode)); + + for (auto SuccNode : TAG.Adj[size_t(CurrNode)]) { + if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { + // Tree-edge: Not seen yet --> recurse + + computeSCCsRec(TAG, SuccNode, Data, Holder); + setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); + } else if (Data.OnStack.test(uint32_t(SuccNode))) { + // Back-edge --> circle! + + setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); + } + } + + if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { + // Found SCC + + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != CurrNode) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } +} + +auto analysis::call_graph::computeSCCs(const TypeAssignmentGraph &TAG) + -> SCCHolder { + SCCHolder Ret{}; + + auto NumNodes = TAG.Nodes.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) + return Ret; + + SCCData Data(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) + computeSCCsRec(TAG, TAGNodeId(FunId), Data, Ret); + } + + return Ret; +} + +auto analysis::call_graph::computeSCCCallers(const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) + -> SCCCallers { + SCCCallers Ret; + Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); + + llvm::SmallBitVector Roots(SCCs.NumSCCs, true); + + size_t NodeId = 0; + for (const auto &SuccNodes : TAG.Adj) { + auto SrcSCC = SCCs.SCCOfNode[NodeId]; + + for (auto SuccNode : SuccNodes) { + auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; + if (DestSCC != SrcSCC) { + Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); + Roots.reset(uint32_t(DestSCC)); + } + } + + ++NodeId; + } + + Ret.SCCRoots.reserve(Roots.count()); + for (auto Rt : Roots.set_bits()) { + Ret.SCCRoots.push_back(SCCId(Rt)); + } + + return Ret; +} + +void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, + const TypeAssignmentGraph &TAG) { + OS << "digraph SCCTAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { + OS << " " << Ctr << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = TAG.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + } + + OS << '\n'; + + size_t Ctr = 0; + for (const auto &Targets : ChildrenOfSCC) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} + +auto analysis::call_graph::computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers) + -> SCCOrder { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.NumSCCs); + + llvm::SmallBitVector Seen; + Seen.resize(SCCs.NumSCCs); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.set(uint32_t(CurrSCC)); + for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { + if (!Seen.test(uint32_t(Caller))) + Dfs(Dfs, Caller); + } + Ret.SCCIds.push_back(CurrSCC); + }; + + for (auto Leaf : Callers.SCCRoots) { + if (!Seen.test(uint32_t(Leaf))) + Dfs(Dfs, Leaf); + } + + std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); + + return Ret; +} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h new file mode 100644 index 0000000000..179daaa658 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include "../../../../../utils/include/phasar/Utils/Compressor.h" + +namespace psr { +class LLVMBasedICFG; +} // namespace psr + +namespace psr::analysis::call_graph { +// struct TypeAssignmentGraph; +enum class GraphNodeId : uint32_t; + +enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; + +struct SCCHolder { + llvm::SmallVector SCCOfNode{}; + llvm::SmallVector> NodesInSCC{}; + size_t NumSCCs = 0; +}; + +struct SCCCallers { + llvm::SmallVector, 0> ChildrenOfSCC{}; + llvm::SmallVector SCCRoots{}; + + template + void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); +}; + +struct SCCOrder { + llvm::SmallVector SCCIds; +}; + +template +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCHolder computeSCCs(const G &Graph); + +template +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers +computeSCCCallers(const G &Graph, const SCCHolder &SCCs); + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder +computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); +} // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using SCCId = psr::analysis::call_graph::SCCId; + + static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } + static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } + static inline auto getHashValue(SCCId Id) noexcept { + return llvm::hash_value(uint32_t(Id)); + } + static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } +}; +} // namespace llvm diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp new file mode 100644 index 0000000000..af2447b372 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp @@ -0,0 +1,698 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +// #include "TypeAssignmentGraph.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +#include "t2/analysis/FilteredAliasSet.h" +#include "t2/analysis/call_graph/AliasSets.h" + +#include +#include +#include +#include + +using namespace psr; +using namespace psr::analysis::call_graph; + +using TAGAliasHandler = llvm::function_ref; +using TAGAliasInfo = llvm::function_ref; + +static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { + OS << "var-"; + OS.write_escaped(psr::llvmIRToString(Var.Val)); +} + +static void printNodeImpl(llvm::raw_ostream &OS, Field Fld) { + OS << "fld-"; + OS.write_escaped(psr::llvmTypeToString(Fld.Base, true)); + OS << '+' << Fld.ByteOffset; +} + +static void printNodeImpl(llvm::raw_ostream &OS, Return Ret) { + OS << "ret-"; + OS.write_escaped(Ret.Fun->getName()); +} + +void analysis::call_graph::printNode(llvm::raw_ostream &OS, TAGNode TN) { + std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); +} + +static llvm::SmallBitVector +getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { + /// NOTE: Copied from SiLLiS + + llvm::SmallBitVector Ret; + + auto PointerSize = DL.getPointerSize(); + // LOGS("[getPointerIndicesOfType]: " << *Ty ); + auto MaxNumPointers = + !Ty->isSized() ? 1 : DL.getTypeAllocSize(Ty) / PointerSize; + if (!MaxNumPointers) { + return Ret; + } + Ret.resize(MaxNumPointers); + + llvm::SmallVector> WorkList = {{Ty, 0}}; + + while (!WorkList.empty()) { + auto [CurrTy, CurrByteOffs] = WorkList.pop_back_val(); + + if (CurrTy->isPointerTy()) { + size_t Idx = CurrByteOffs / PointerSize; + if (CurrByteOffs % PointerSize) [[unlikely]] { + llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer " + "found at offset " + << CurrByteOffs << " in type " << *Ty; + } + assert(Ret.size() > Idx && + "reserved unsufficient space for pointer indices"); + Ret.set(Idx); + continue; + } + + if (CurrTy->isArrayTy()) { + auto *ElemTy = CurrTy->getArrayElementType(); + auto ArrayLen = CurrTy->getArrayNumElements(); + auto ElemSize = DL.getTypeAllocSize(ElemTy); + for (size_t I = 0, Offs = CurrByteOffs; I < ArrayLen; + ++I, Offs += ElemSize) { + WorkList.emplace_back(ElemTy, Offs); + } + continue; + } + + if (auto *Struct = llvm::dyn_cast(CurrTy)) { + auto NumElems = Struct->getNumElements(); + const auto *SL = DL.getStructLayout(Struct); + for (size_t I = 0; I < NumElems; ++I) { + auto Offs = CurrByteOffs + SL->getElementOffset(I); + WorkList.emplace_back(Struct->getElementType(I), Offs); + } + continue; + } + } + + return Ret; +} + +static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { + TAG.Nodes.getOrInsert(TN); +} + +static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto &&Structs = Mod.getIdentifiedStructTypes(); + TAG.Nodes.reserve(TAG.Nodes.size() + Structs.size()); + + size_t PointerSize = DL.getPointerSize(); + + for (auto *ST : Structs) { + auto Offsets = getPointerIndicesOfType(ST, DL); + for (auto Offs : Offsets.set_bits()) { + addTAGNode({Field{ST, Offs * PointerSize}}, TAG); + } + addTAGNode({Field{ST, SIZE_MAX}}, TAG); + } +} + +static void addGlobals(const llvm::Module &Mod, TypeAssignmentGraph &TAG) { + auto NumGlobals = Mod.global_size(); + TAG.Nodes.reserve(TAG.Nodes.size() + NumGlobals); + + for (const auto &Glob : Mod.globals()) { + if (Glob.getValueType()->isIntOrIntVectorTy() || + Glob.getValueType()->isFloatingPointTy()) { + continue; + } + auto GlobName = Glob.getName(); + if (GlobName.startswith("_ZTV") || GlobName.startswith("_ZTI") || + GlobName.startswith("_ZTS")) { + continue; + } + + addTAGNode({Variable{&Glob}}, TAG); + } +} + +static void initializeWithFun(const llvm::Function *Fun, + TypeAssignmentGraph &TAG) { + // Add all params + // Add all locals + // Add return + + if (Fun->isDeclaration()) + return; + + for (const auto &Arg : Fun->args()) { + if (!Arg.getType()->isPointerTy()) + continue; + + addTAGNode({Variable{&Arg}}, TAG); + } + + for (const auto &I : llvm::instructions(Fun)) { + if (!I.getType()->isPointerTy()) { + // TODO: What about SSA structs that contain pointers? + continue; + } + + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (Alloca->getAllocatedType()->isIntOrIntVectorTy() || + Alloca->getAllocatedType()->isFloatingPointTy()) { + continue; + } + } + + addTAGNode({Variable{&I}}, TAG); + } + + if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) + addTAGNode({Return{Fun}}, TAG); +} + +[[nodiscard]] static bool isVTableOrFun(const llvm::Value *Val) { + const auto *Base = Val->stripPointerCastsAndAliases(); + if (llvm::isa(Base)) + return true; + + if (const auto *Glob = llvm::dyn_cast(Base)) + return Glob->isConstant() && Glob->getName().startswith("_ZTV"); + + return false; +} + +static void handleAlloca(const llvm::AllocaInst *Alloca, + TypeAssignmentGraph &TAG, + const psr::LLVMVFTableProvider &VTP) { + auto TN = TAG.get({Variable{Alloca}}); + if (!TN) + return; + + const auto *AllocTy = + llvm::dyn_cast(Alloca->getAllocatedType()); + if (!AllocTy) + return; + + if (const auto *TV = VTP.getVFTableGlobal(AllocTy)) { + TAG.TypeEntryPoints[*TN].insert(TV); + } +} + +static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto Offs = [&]() -> size_t { + llvm::APInt Offs(64, 0); + if (GEP->accumulateConstantOffset(DL, Offs)) { + return Offs.getZExtValue(); + } + return SIZE_MAX; + }(); + + return TAG.get({Field{GEP->getSourceElementType(), Offs}}); +} + +static void handleGEP(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{GEP}}); + if (!To) + return; + + if (!GEP->isInBounds()) { + auto From = TAG.get({Variable{GEP->getPointerOperand()}}); + + if (From && To) + TAG.addEdge(*From, *To); + + return; + } + // TODO: Is this correct? -- also check load + + auto From = getGEPNode(GEP, TAG, DL); + if (From) + TAG.addEdge(*From, *To); +} + +static bool handleEntryForStore(const llvm::StoreInst *Store, + TypeAssignmentGraph &TAG, TAGAliasInfo AI, + const llvm::DataLayout &DL) { + const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); + bool IsEntry = isVTableOrFun(Base); + + if (!IsEntry) + return false; + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.TypeEntryPoints[*GEPNodeId].insert(Base); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) + TAG.TypeEntryPoints[*ApproxDest].insert(Base); + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // TODO: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) + return; + + TAG.TypeEntryPoints[*DestNodeId].insert(Base); + }); + return true; +} + +static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, + TAGAliasInfo AI, const llvm::DataLayout &DL) { + + if (handleEntryForStore(Store, TAG, AI, DL)) + return; + + auto From = TAG.get({Variable{Store->getValueOperand()}}); + if (!From) + return; + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.addEdge(*From, *GEPNodeId); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) + TAG.addEdge(*From, *ApproxDest); + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // TODO: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) + return; + + TAG.addEdge(*From, *DestNodeId); + }); +} + +static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{Load}}); + if (!To) + return; + + auto From = TAG.get({Variable{Load->getPointerOperand()}}); + if (From) + TAG.addEdge(*From, *To); + + if (const auto *GEPDest = + llvm::dyn_cast(Load->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) + TAG.addEdge(*GEPNodeId, *To); + } +} + +static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { + auto To = TAG.get({Variable{Phi}}); + if (!To) + return; + + for (const auto &Inc : Phi->incoming_values()) { + auto From = TAG.get({Variable{Inc.get()}}); + if (From) + TAG.addEdge(*From, *To); + } +} + +static llvm::StringRef extractTypeName(llvm::StringRef CtorName) { + // Example: _ZN3OneC2Ev + + auto EndIdx = CtorName.rfind("C2E"); + if (EndIdx == llvm::StringRef::npos) + EndIdx = CtorName.rfind("C1E"); + + if (EndIdx == llvm::StringRef::npos) + EndIdx = CtorName.size(); + + auto StartIdx = EndIdx; + while (StartIdx) { + --StartIdx; + + if (llvm::isDigit(CtorName[StartIdx])) + break; + } + return CtorName.slice(StartIdx, EndIdx); +} +static llvm::StringRef extractTypeName(std::string &&) = delete; + +static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, + const llvm::Module &Mod, + const psr::LLVMVFTableProvider &VTP) { + if (!CompTy->getIdentifier().empty()) { + + std::string Buf; + auto TypeName = CompTy->getIdentifier(); + if (TypeName.startswith("_ZTS") || TypeName.startswith("_ZTI")) { + Buf = TypeName.str(); + Buf[3] = 'V'; + TypeName = Buf; + } + + if (const auto *GlobTV = Mod.getNamedGlobal(TypeName)) { + return GlobTV; + } + if (const auto *Alias = Mod.getNamedAlias(TypeName)) { + return Alias->getAliasee()->stripPointerCastsAndAliases(); + } + + return nullptr; + } + + auto ClearName = CompTy->getName().str(); + const auto *Scope = CompTy->getScope(); + while (llvm::isa_and_nonnull(Scope)) { + ClearName = Scope->getName().str().append("::").append(ClearName); + Scope = Scope->getScope(); + } + + return VTP.getVFTableGlobal(ClearName); +} + +static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, + TypeAssignmentGraph &TAG, + const llvm::Function *Callee, + const psr::LLVMVFTableProvider &VTP) { + + if (!psr::isHeapAllocatingFunction(Callee)) + return; + + if (const auto *MDNode = Call->getMetadata("heapallocsite")) { + + // Shortcut + if (const auto *CompTy = llvm::dyn_cast(MDNode); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + if (const auto *Ty = getTypeFromDI(CompTy, *Call->getModule(), VTP)) { + + TAG.TypeEntryPoints[CSNod].insert(Ty); + return; + } + } + } + // TODO: Fallback solution + + // llvm::SmallDenseSet Seen; + // llvm::SmallVector WL = {Call}; + + // // Search for the ctor call + + // const auto *CallerFun = Call->getFunction(); + + // while (!WL.empty()) { + // const auto *CurrObj = WL.pop_back_val(); + // for (const auto &Use : CurrObj->uses()) { + // const auto *User = llvm::dyn_cast(Use.getUser()); + // if (!User || User->getFunction() != CallerFun) + // continue; + + // if (const auto *Cast = llvm::dyn_cast(User); + // Cast && Cast->getDestTy()->isPointerTy()) { + // if (Seen.insert(Cast).second) + // WL.push_back(Cast); + + // continue; + // } + + // if (const auto *CtorCall = llvm::dyn_cast(User); + // CtorCall && CtorCall->getCalledFunction() && + // Use == CtorCall->getArgOperand(0)) { + // auto CtorName = CtorCall->getCalledFunction()->getName(); + // if (psr::isConstructor(CtorName)) { + // auto DemangledCtorName = llvm::demangle(CtorName.str()); + + // auto TypeName = extractTypeName(CtorName); + + // // TODO + // } + // // TODO: Extract type from ctor fun + // } + // } + // } +} + +static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, + const psr::CallGraph &BaseCG, + const psr::LLVMVFTableProvider &VTP) { + + llvm::SmallVector> Args; + llvm::SmallBitVector EntryArgs; + bool HasArgNode = false; + + for (const auto &Arg : Call->args()) { + auto TN = TAG.get({Variable{Arg.get()}}); + Args.push_back(TN); + if (TN) + HasArgNode = true; + + bool IsEntry = isVTableOrFun(Arg.get()); + EntryArgs.push_back(IsEntry); + } + + auto CSNod = TAG.get({Variable{Call}}); + + // TODO: Handle struct returns that contain pointers + if (!HasArgNode && !CSNod) + return; + + for (const auto *Callee : BaseCG.getCalleesOfCallAt(Call)) { + handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); + + for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { + auto ParamNodId = TAG.get({Variable{&Param}}); + if (!ParamNodId) + continue; + + if (EntryArgs.test(Param.getArgNo())) { + TAG.TypeEntryPoints[*ParamNodId].insert( + Call->getArgOperand(Param.getArgNo()) + ->stripPointerCastsAndAliases()); + } + + if (!Arg) + continue; + + if (!Param.hasStructRetAttr()) + TAG.addEdge(*Arg, *ParamNodId); + + // if (!Param.hasByValAttr()) + // TAG.addEdge(*ParamNodId, *Arg); + } + if (CSNod) { + auto RetNod = TAG.get({Return{Callee}}); + if (RetNod) + TAG.addEdge(*RetNod, *CSNod); + } + } +} + +static void handleReturn(const llvm::ReturnInst *Ret, + TypeAssignmentGraph &TAG) { + + auto TNId = TAG.get({Return{Ret->getFunction()}}); + if (!TNId) + return; + + if (const auto *RetVal = Ret->getReturnValue()) { + const auto *Base = RetVal->stripPointerCastsAndAliases(); + if (isVTableOrFun(Base)) { + TAG.TypeEntryPoints[*TNId].insert(Base); + return; + } + + auto From = TAG.get({Variable{Base}}); + if (From) + TAG.addEdge(*From, *TNId); + } +} + +static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, + const psr::CallGraph &BaseCG, + TAGAliasInfo AI, const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + handleAlloca(Alloca, TAG, VTP); + return; + } + if (const auto *Load = llvm::dyn_cast(&I)) { + handleLoad(Load, TAG, DL); + return; + } + if (const auto *GEP = llvm::dyn_cast(&I)) { + handleGEP(GEP, TAG, DL); + return; + } + if (const auto *Store = llvm::dyn_cast(&I)) { + handleStore(Store, TAG, AI, DL); + return; + } + if (const auto *Phi = llvm::dyn_cast(&I)) { + handlePhi(Phi, TAG); + return; + } + if (const auto *Cast = llvm::dyn_cast(&I)) { + auto From = TAG.get({Variable{Cast->getOperand(0)}}); + auto To = TAG.get({Variable{Cast}}); + + if (From && To) + TAG.addEdge(*From, *To); + } + if (const auto *Call = llvm::dyn_cast(&I)) { + handleCall(Call, TAG, BaseCG, VTP); + return; + } + if (const auto *Ret = llvm::dyn_cast(&I)) { + handleReturn(Ret, TAG); + return; + } + // TODO: Handle more cases +} + +static void buildTAGWithFun( + const llvm::Function *Fun, TypeAssignmentGraph &TAG, + const psr::CallGraph + &BaseCG, + TAGAliasInfo AI, const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + for (const auto &I : llvm::instructions(Fun)) { + dispatch(I, TAG, BaseCG, AI, DL, VTP); + } +} + +static auto computeTypeAssignmentGraphImpl( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + TAGAliasInfo AI, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + TypeAssignmentGraph TAG; + + const auto &DL = Mod.getDataLayout(); + + addFields(Mod, TAG, DL); + addGlobals(Mod, TAG); + + for (const auto &Fun : Mod) { + initializeWithFun(&Fun, TAG); + } + + TAG.Adj.resize(TAG.Nodes.size()); + + for (const auto &Fun : Mod) { + buildTAGWithFun(&Fun, TAG, BaseCG, AI, DL, VTP); + } + + return TAG; +} + +auto analysis::call_graph::computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + FilteredAliasSet FAS(AS); + return computeTypeAssignmentGraphImpl( + Mod, BaseCG, + [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { + FAS.foreachAlias(Fact, At, Handler); + }, + VTP); +} + +auto analysis::call_graph::computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + AliasInfo AI(&ObjGraph); + FilteredAliasSet FAS(AI.aliases()); + return computeTypeAssignmentGraphImpl( + Mod, BaseCG, + [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { + FAS.foreachAlias(Fact, At, Handler); + }, + VTP); +} + +void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { + OS << "digraph TAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + size_t Ctr = 0; + for (const auto &TN : Nodes) { + OS << " " << Ctr << "[label=\""; + printNode(OS, TN); + OS << "\"];\n"; + + ++Ctr; + } + + OS << '\n'; + + Ctr = 0; + for (const auto &Targets : Adj) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h new file mode 100644 index 0000000000..1022fe00b7 --- /dev/null +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h @@ -0,0 +1,150 @@ +#pragma once + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include "../../../../../utils/include/phasar/Utils/Compressor.h" +#include "../ControlFlow/CallGraph.h" +#include "../ControlFlow/LLVMVFTableProvider.h" +#include "../Pointer/LLVMAliasInfo.h" +#include "../TypeHierarchy/LLVMTypeHierarchy.h" + +#include +#include + +namespace psr { +class FilteredAliasSet; +} // namespace psr + +namespace psr::analysis::call_graph { + +enum class [[clang::enum_extensibility(open)]] TAGNodeId : uint32_t{}; + +struct Variable { + const llvm::Value *Val; +}; + +struct Field { + const llvm::Type *Base; + size_t ByteOffset; +}; + +struct Return { + const llvm::Function *Fun; +}; + +struct TAGNode { + std::variant Label; +}; + +constexpr bool operator==(Variable L, Variable R) noexcept { + return L.Val == R.Val; +} +constexpr bool operator==(Field L, Field R) noexcept { + return L.Base == R.Base && L.ByteOffset == R.ByteOffset; +} +constexpr bool operator==(Return L, Return R) noexcept { + return L.Fun == R.Fun; +} +constexpr bool operator==(TAGNode L, TAGNode R) noexcept { + return L.Label == R.Label; +} +}; // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using TAGNode = psr::analysis::call_graph::TAGNode; + using Variable = psr::analysis::call_graph::Variable; + using Field = psr::analysis::call_graph::Field; + using Return = psr::analysis::call_graph::Return; + + inline static TAGNode getEmptyKey() noexcept { + return {Variable{llvm::DenseMapInfo::getEmptyKey()}}; + } + inline static TAGNode getTombstoneKey() noexcept { + return { + Variable{llvm::DenseMapInfo::getTombstoneKey()}}; + } + inline static bool isEqual(TAGNode L, TAGNode R) noexcept { return L == R; } + inline static auto getHashValue(TAGNode TN) noexcept { + if (const auto *Var = std::get_if(&TN.Label)) { + return llvm::hash_combine(0, Var->Val); + } + if (const auto *Fld = std::get_if(&TN.Label)) { + return llvm::hash_combine(1, Fld->Base, Fld->ByteOffset); + } + if (const auto *Ret = std::get_if(&TN.Label)) { + return llvm::hash_combine(2, Ret->Fun); + } + llvm_unreachable("All TAGNode variants should be handled already"); + } +}; + +template <> struct DenseMapInfo { + using TAGNodeId = psr::analysis::call_graph::TAGNodeId; + inline static TAGNodeId getEmptyKey() noexcept { return TAGNodeId(-1); } + inline static TAGNodeId getTombstoneKey() noexcept { return TAGNodeId(-2); } + inline static bool isEqual(TAGNodeId L, TAGNodeId R) noexcept { + return L == R; + } + inline static auto getHashValue(TAGNodeId TN) noexcept { + return llvm::hash_value(uint32_t(TN)); + } +}; + +} // namespace llvm + +namespace psr::analysis::call_graph { +struct ObjectGraph; + +struct TypeAssignmentGraph { + + Compressor Nodes; + + llvm::SmallVector, 0> Adj; + llvm::SmallDenseMap> + TypeEntryPoints; + + [[nodiscard]] inline std::optional get(TAGNode TN) const noexcept { + return Nodes.getOrNull(TN); + } + + [[nodiscard]] inline TAGNode operator[](TAGNodeId Id) const noexcept { + return Nodes[Id]; + } + + inline void addEdge(TAGNodeId From, TAGNodeId To) { + assert(size_t(From) < Adj.size()); + assert(size_t(To) < Adj.size()); + + if (From == To) + return; + + Adj[size_t(From)].insert(To); + } + + void print(llvm::raw_ostream &OS); +}; + +[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP); + +[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP); + +void printNode(llvm::raw_ostream &OS, TAGNode TN); +}; // namespace psr::analysis::call_graph diff --git a/phasar/utils/include/phasar/Utils/Compressor.h b/phasar/utils/include/phasar/Utils/Compressor.h new file mode 100644 index 0000000000..edf87095e5 --- /dev/null +++ b/phasar/utils/include/phasar/Utils/Compressor.h @@ -0,0 +1,212 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" + +#include "ByRef.h" +#include "TypeTraits.h" + +#include +#include +#include +#include +#include + +namespace psr { +template +class Compressor; + +template +class Compressor>> { +public: + void reserve(size_t Capacity) { + assert(Capacity <= UINT32_MAX); + ToInt.reserve(Capacity); + FromInt.reserve(Capacity); + } + + Id getOrInsert(T Elem) { + auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); + if (Inserted) { + FromInt.push_back(Elem); + } + return It->second; + } + + std::pair insert(T Elem) { + auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); + if (Inserted) { + FromInt.push_back(Elem); + } + return {It->second, Inserted}; + } + + [[nodiscard]] std::optional getOrNull(T Elem) const { + if (auto It = ToInt.find(Elem); It != ToInt.end()) { + return It->second; + } + return std::nullopt; + } + + [[nodiscard]] Id get(T Elem) const { + auto It = ToInt.find(Elem); + assert(It != ToInt.end()); + return It->second; + } + + [[nodiscard]] T operator[](Id Idx) const noexcept { + assert(size_t(Idx) < FromInt.size()); + return FromInt[size_t(Idx)]; + } + + [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } + [[nodiscard]] size_t capacity() const noexcept { + return FromInt.capacity() + + ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); + } + + auto begin() const noexcept { return FromInt.begin(); } + auto end() const noexcept { return FromInt.end(); } + +private: + llvm::DenseMap ToInt; + llvm::SmallVector FromInt; +}; + +template +class Compressor>> { +public: + void reserve(size_t Capacity) { + assert(Capacity <= UINT32_MAX); + ToInt.reserve(Capacity); + } + + Id getOrInsert(const T &Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(Elem); + ToInt[Ins] = Ret; + return Ret; + } + + Id getOrInsert(T &&Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(std::move(Elem)); + ToInt[Ins] = Ret; + return Ret; + } + + std::pair insert(const T &Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(Elem); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + std::pair insert(T &&Elem) { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return {It->second, false}; + } + auto Ret = Id(FromInt.size()); + auto *Ins = &FromInt.emplace_back(std::move(Elem)); + ToInt[Ins] = Ret; + return {Ret, true}; + } + + [[nodiscard]] std::optional getOrNull(const T &Elem) const { + if (auto It = ToInt.find(&Elem); It != ToInt.end()) { + return It->second; + } + return std::nullopt; + } + + [[nodiscard]] Id get(const T &Elem) const { + auto It = ToInt.find(&Elem); + assert(It != ToInt.end()); + return It->second; + } + + const T &operator[](Id Idx) const noexcept { + assert(size_t(Idx) < FromInt.size()); + return FromInt[size_t(Idx)]; + } + + [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } + [[nodiscard]] size_t capacity() const noexcept { + return FromInt.size() + + ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); + } + + auto begin() const noexcept { return FromInt.begin(); } + auto end() const noexcept { return FromInt.end(); } + +private: + struct DSI : llvm::DenseMapInfo { + static auto getHashValue(const T *Elem) noexcept { + assert(Elem != nullptr); + if constexpr (has_llvm_dense_map_info) { + return llvm::DenseMapInfo::getHashValue(*Elem); + } else { + return std::hash{}(*Elem); + } + } + static auto isEqual(const T *LHS, const T *RHS) noexcept { + if (LHS == RHS) { + return true; + } + if (LHS == DSI::getEmptyKey() || LHS == DSI::getTombstoneKey() || + RHS == DSI::getEmptyKey() || RHS == DSI::getTombstoneKey()) { + return false; + } + if constexpr (has_llvm_dense_map_info) { + return llvm::DenseMapInfo::isEqual(*LHS, *RHS); + } else { + return *LHS == *RHS; + } + } + }; + + std::deque FromInt; + llvm::DenseMap ToInt; +}; + +struct NoneCompressor final { + constexpr NoneCompressor() noexcept = default; + + template >> + constexpr NoneCompressor(const T & /*unused*/) noexcept {} + + template + [[nodiscard]] decltype(auto) getOrInsert(T &&Val) const noexcept { + return std::forward(Val); + } + template + [[nodiscard]] decltype(auto) operator[](T &&Val) const noexcept { + return std::forward(Val); + } + void reserve(size_t /*unused*/) const noexcept {} + + [[nodiscard]] size_t size() const noexcept { return 0; } + [[nodiscard]] size_t capacity() const noexcept { return 0; } +}; + +} // namespace psr From 1cd7a86b1a3324810cf46a433f7bc504f0e3bee5 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Fri, 23 Aug 2024 12:11:05 +0200 Subject: [PATCH 02/27] SCCGeneric.h finished --- .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h index 6697347d75..0356f6ea39 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -11,6 +11,8 @@ // #include "phasar/PhasarLLVM/Utils/Compressor.h" +#include "phasar/Utils/Utilities.h" + #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" @@ -227,12 +229,124 @@ template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { return Ret; } +// choose which Tarjan implementation will be executed +template +[[nodiscard]] SCCHolder execTarjan(const G &Graph, const bool Iterative) { + SCCHolder Ret{}; + + auto NumNodes = Graph.Nodes.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) { + return Ret; + } + + SCCData Data(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) { + if (Iterative) { + TarjanIt(Graph, GraphNodeId(FunId), Data, Ret); + } else { + computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + } + } + } + + return Ret; +} + template [[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers computeSCCCallers(const G &Graph, const SCCHolder &SCCs); +template +auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { + SCCCallers Ret; + Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); + + llvm::SmallBitVector Roots(SCCs.NumSCCs, true); + + size_t NodeId = 0; + for (const auto &SuccNodes : Graph.Adj) { + auto SrcSCC = SCCs.SCCOfNode[NodeId]; + + for (auto SuccNode : SuccNodes) { + auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; + if (DestSCC != SrcSCC) { + Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); + Roots.reset(uint32_t(DestSCC)); + } + } + + ++NodeId; + } + + Ret.SCCRoots.reserve(Roots.count()); + for (auto Rt : Roots.set_bits()) { + Ret.SCCRoots.push_back(SCCId(Rt)); + } + + return Ret; +} + +template +void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, + const G &Graph) { + OS << "digraph SCCTAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { + OS << " " << Ctr << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = Graph.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + } + + OS << '\n'; + + size_t Ctr = 0; + for (const auto &Targets : ChildrenOfSCC) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} + [[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); + +inline auto computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers) + -> SCCOrder { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.NumSCCs); + + llvm::SmallBitVector Seen; + Seen.resize(SCCs.NumSCCs); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.set(uint32_t(CurrSCC)); + for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { + if (!Seen.test(uint32_t(Caller))) { + Dfs(Dfs, Caller); + } + } + Ret.SCCIds.push_back(CurrSCC); + }; + + for (auto Leaf : Callers.SCCRoots) { + if (!Seen.test(uint32_t(Leaf))) { + Dfs(Dfs, Leaf); + } + } + + std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); + + return Ret; +} } // namespace psr::analysis::call_graph namespace llvm { From 5fdfc560a3912d481f49537ecd4fdf2d6b01f106 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Fri, 23 Aug 2024 14:53:46 +0200 Subject: [PATCH 03/27] typeAssignmentGraph ported --- .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 7 +- include/phasar/PhasarLLVM/Utils/AliasSets.h | 41 ++ .../PhasarLLVM/Utils/FilteredAliasSet.h | 66 ++ .../PhasarLLVM/Utils/TypeAssignmentGraph.h | 162 ++++ .../phasar/PhasarLLVM/Utils/TypePropagator.h | 39 + .../DataFlow/IfdsIde/SCCGeneric.cpp | 60 ++ lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 336 +++++++++ .../DataFlow/IfdsIde/TypeAssignmentGraph.cpp | 697 ++++++++++++++++++ .../DataFlow/IfdsIde/TypePropagator.cpp | 84 +++ .../phasar/PhasarLLVM/Utils/SCCGeneric.cpp | 5 +- 10 files changed, 1491 insertions(+), 6 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Utils/AliasSets.h create mode 100644 include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h create mode 100644 include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h create mode 100644 include/phasar/PhasarLLVM/Utils/TypePropagator.h create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h index 0356f6ea39..69e9fa2ade 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -8,8 +8,8 @@ *****************************************************************************/ #pragma once - -// #include "phasar/PhasarLLVM/Utils/Compressor.h" +// error in included header Compressor.h +// #include "phasar/PhasarLLVM/Utils/Compressor.h" #include "phasar/Utils/Utilities.h" @@ -142,8 +142,7 @@ static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, // Iterative IMplementation for Tarjan's SCC Alg. // -> Heapoverflow through simulated Stack? template -static void TarjanIt(const G &Graph, GraphNodeId StartNode, SCCDataIt &Data, - SCCHolder &Holder) { +static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { auto CurrTime = Data.Time; for (uint32_t Vertex = 0; Vertex < Graph.Nodes.size(); Vertex++) { diff --git a/include/phasar/PhasarLLVM/Utils/AliasSets.h b/include/phasar/PhasarLLVM/Utils/AliasSets.h new file mode 100644 index 0000000000..dc37c74752 --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/AliasSets.h @@ -0,0 +1,41 @@ +#pragma once + +#include "phasar/Utils/StableVector.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/IR/Value.h" + +namespace psr::analysis::call_graph { +struct ObjectGraph; + +struct AliasSets { + using AliasSetTy = llvm::SmallDenseSet; + + psr::StableVector AliasSetOwner{}; + llvm::SmallVector AliasSetMap{}; + + void print(llvm::raw_ostream &OS, const ObjectGraph &Graph) const; +}; + +class AliasInfo { +public: + explicit AliasInfo(const ObjectGraph *Graph) noexcept : Graph(Graph) { + assert(Graph != nullptr); + } + + auto aliases() { + return [this](const llvm::Value *Fact, + llvm::function_ref WithAlias) { + return foreachAlias(Fact, WithAlias); + }; + }; + +private: + void foreachAlias(const llvm::Value *Fact, + llvm::function_ref WithAlias); + + const ObjectGraph *Graph{}; +}; + +} // namespace psr::analysis::call_graph diff --git a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h new file mode 100644 index 0000000000..eb62bf077f --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +// #include "TypeAssignmentGraph.h" + +#pragma once + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" + +#include + +namespace llvm { +class Value; +class Instruction; +} // namespace llvm + +namespace psr { +// Belongs into phasar! +class FilteredAliasSet { +public: + using d_t = const llvm::Value *; + using n_t = const llvm::Instruction *; + using container_type = std::set; + + using alias_handler_t = llvm::function_ref; + using alias_info_ref_t = + llvm::function_ref; + using alias_info_t = + llvm::unique_function; + + FilteredAliasSet(alias_info_t &&PT) noexcept : PT(std::move(PT)) { + assert(this->PT); + } + + explicit FilteredAliasSet(psr::LLVMAliasInfoRef AS) + : PT([AS](const llvm::Value *Fact, auto Handler) { + for (const auto *Alias : *AS.getAliasSet(Fact)) { + Handler(Alias); + } + }) {} + + [[nodiscard]] container_type getAliasSet(d_t Val, n_t At); + [[nodiscard]] container_type getMustAliasSet(d_t Val, n_t At) { + return {Val}; + } + + void foreachAlias(d_t Fact, n_t At, llvm::function_ref WithAlias); + void foreachMustAlias(d_t Fact, n_t At, + llvm::function_ref WithAlias) { + WithAlias(Fact); + } + +private: + alias_info_t PT; +}; +} // namespace psr diff --git a/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h new file mode 100644 index 0000000000..c652eb24e9 --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h @@ -0,0 +1,162 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/Compressor.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace psr { +class FilteredAliasSet; +} // namespace psr + +namespace psr::analysis::call_graph { + +enum class [[clang::enum_extensibility(open)]] GraphNodeId : uint32_t{}; + +struct Variable { + const llvm::Value *Val; +}; + +struct Field { + const llvm::Type *Base; + size_t ByteOffset; +}; + +struct Return { + const llvm::Function *Fun; +}; + +struct TAGNode { + std::variant Label; +}; + +constexpr bool operator==(Variable L, Variable R) noexcept { + return L.Val == R.Val; +} +constexpr bool operator==(Field L, Field R) noexcept { + return L.Base == R.Base && L.ByteOffset == R.ByteOffset; +} +constexpr bool operator==(Return L, Return R) noexcept { + return L.Fun == R.Fun; +} +constexpr bool operator==(TAGNode L, TAGNode R) noexcept { + return L.Label == R.Label; +} +}; // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using TAGNode = psr::analysis::call_graph::TAGNode; + using Variable = psr::analysis::call_graph::Variable; + using Field = psr::analysis::call_graph::Field; + using Return = psr::analysis::call_graph::Return; + + inline static TAGNode getEmptyKey() noexcept { + return {Variable{llvm::DenseMapInfo::getEmptyKey()}}; + } + inline static TAGNode getTombstoneKey() noexcept { + return { + Variable{llvm::DenseMapInfo::getTombstoneKey()}}; + } + inline static bool isEqual(TAGNode L, TAGNode R) noexcept { return L == R; } + inline static auto getHashValue(TAGNode TN) noexcept { + if (const auto *Var = std::get_if(&TN.Label)) { + return llvm::hash_combine(0, Var->Val); + } + if (const auto *Fld = std::get_if(&TN.Label)) { + return llvm::hash_combine(1, Fld->Base, Fld->ByteOffset); + } + if (const auto *Ret = std::get_if(&TN.Label)) { + return llvm::hash_combine(2, Ret->Fun); + } + llvm_unreachable("All TAGNode variants should be handled already"); + } +}; + +template <> struct DenseMapInfo { + using GraphNodeId = psr::analysis::call_graph::GraphNodeId; + inline static GraphNodeId getEmptyKey() noexcept { return GraphNodeId(-1); } + inline static GraphNodeId getTombstoneKey() noexcept { + return GraphNodeId(-2); + } + inline static bool isEqual(GraphNodeId L, GraphNodeId R) noexcept { + return L == R; + } + inline static auto getHashValue(GraphNodeId TN) noexcept { + return llvm::hash_value(uint32_t(TN)); + } +}; + +} // namespace llvm + +namespace psr::analysis::call_graph { +struct ObjectGraph; + +struct TypeAssignmentGraph { + + Compressor Nodes; + + llvm::SmallVector, 0> Adj; + llvm::SmallDenseMap> + TypeEntryPoints; + + [[nodiscard]] inline std::optional + get(TAGNode TN) const noexcept { + return Nodes.getOrNull(TN); + } + + [[nodiscard]] inline TAGNode operator[](GraphNodeId Id) const noexcept { + return Nodes[Id]; + } + + inline void addEdge(GraphNodeId From, GraphNodeId To) { + assert(size_t(From) < Adj.size()); + assert(size_t(To) < Adj.size()); + + if (From == To) + return; + + Adj[size_t(From)].insert(To); + } + + void print(llvm::raw_ostream &OS); +}; + +[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP); + +[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP); + +void printNode(llvm::raw_ostream &OS, TAGNode TN); +}; // namespace psr::analysis::call_graph diff --git a/include/phasar/PhasarLLVM/Utils/TypePropagator.h b/include/phasar/PhasarLLVM/Utils/TypePropagator.h new file mode 100644 index 0000000000..7984d80a4e --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/TypePropagator.h @@ -0,0 +1,39 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class Value; +} // namespace llvm + +namespace psr::analysis::call_graph { +struct TypeAssignmentGraph; +struct SCCHolder; +struct SCCCallers; +struct SCCOrder; + +struct TypeAssignment { + llvm::SmallVector, 0> TypesPerSCC; + + LLVM_LIBRARY_VISIBILITY void print(llvm::raw_ostream &OS, + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs); +}; + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY TypeAssignment +propagateTypes(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, + const SCCCallers &Deps, const SCCOrder &Order); + +} // namespace psr::analysis::call_graph diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp new file mode 100644 index 0000000000..f973b1216c --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp @@ -0,0 +1,60 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +// #include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" +#include "SCCGeneric.h" + +// #include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" + +#include + +// #include "phasar/PhasarLLVM/Utils/Compressor.h" +// #include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" + +#include "llvm/ADT/SmallBitVector.h" + +#include +#include +#include + +using namespace psr; + +using SCCId = analysis::call_graph::SCCId; + +class ExampleGraph { +public: + ExampleGraph() = default; + + std::vector + getEdges(const analysis::call_graph::GraphNodeId ID) const { + return Adj[uint32_t(ID)]; + } + std::vector> Adj; +}; + +int main() { + ExampleGraph Gr; + std::vector> list = { + {analysis::call_graph::GraphNodeId(2)}, + {analysis::call_graph::GraphNodeId(0)}, + {analysis::call_graph::GraphNodeId(1)}, + {analysis::call_graph::GraphNodeId(1), + analysis::call_graph::GraphNodeId(2)}, + {analysis::call_graph::GraphNodeId(1)}, + {analysis::call_graph::GraphNodeId(4), + analysis::call_graph::GraphNodeId(6)}, + {analysis::call_graph::GraphNodeId(4), + analysis::call_graph::GraphNodeId(7)}, + {analysis::call_graph::GraphNodeId(5)}}; + + auto Output = analysis::call_graph::execTarjan(Gr, false); + std::cout << Output.NumSCCs; + auto Out = analysis::call_graph::execTarjan(Gr, true); + std::cout << Out.NumSCCs; +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h new file mode 100644 index 0000000000..c4248e3e7c --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -0,0 +1,336 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#pragma once +// error in included header Compressor.h +// #include "phasar/PhasarLLVM/Utils/Compressor.h" + +// #include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace psr { +class LLVMBasedICFG; +} // namespace psr + +namespace psr::analysis::call_graph { +// struct TypeAssignmentGraph; +enum class GraphNodeId : uint32_t; + +enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; + +// holds the scc's of a given graph +struct SCCHolder { + llvm::SmallVector SCCOfNode{}; + llvm::SmallVector> NodesInSCC{}; + size_t NumSCCs = 0; +}; + +// holds a graph were the scc's are compressed to a single node. Resulting graph +// is a DAG +struct SCCCallers { + llvm::SmallVector, 0> ChildrenOfSCC{}; + llvm::SmallVector SCCRoots{}; + + template + void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); +}; + +// holds topologically sorted scccallers +struct SCCOrder { + llvm::SmallVector SCCIds; +}; + +struct SCCData { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCData(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +struct SCCDataIt { + llvm::SmallVector Disc; + llvm::SmallVector Low; + llvm::SmallBitVector OnStack; + llvm::SmallVector Stack; + llvm::SmallVector> CallStack; + uint32_t Time = 0; + llvm::SmallBitVector Seen; + + explicit SCCDataIt(size_t NumFuns) + : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), + Seen(NumFuns) {} +}; + +static void setMin(uint32_t &InOut, uint32_t Other) { + if (Other < InOut) { + InOut = Other; + } +} + +// TODO: Non-recursive version +template +static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, + SCCHolder &Holder) { + // See + // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components + + auto CurrTime = Data.Time++; + Data.Disc[size_t(CurrNode)] = CurrTime; + Data.Low[size_t(CurrNode)] = CurrTime; + Data.Stack.push_back(CurrNode); + Data.OnStack.set(uint32_t(CurrNode)); + + for (auto SuccNode : Graph.Adj[size_t(CurrNode)]) { + if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { + // Tree-edge: Not seen yet --> recurse + + computeSCCsRec(Graph, SuccNode, Data, Holder); + setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); + } else if (Data.OnStack.test(uint32_t(SuccNode))) { + // Back-edge --> circle! + + setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); + } + } + + if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { + // Found SCC + + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != CurrNode) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } +} + +// Iterative IMplementation for Tarjan's SCC Alg. +// -> Heapoverflow through simulated Stack? +template +static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { + + auto CurrTime = Data.Time; + for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { + if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { + Data.CallStack.push_back({GraphNodeId(Vertex), 0}); + while (!Data.CallStack.empty()) { + auto Curr = Data.CallStack.pop_back_val(); + // Curr.second = 0 implies that Curr.fist was not visited before + if (Curr.second == 0) { + Data.Disc[size_t(Curr.first)] = CurrTime; + Data.Low[size_t(Curr.first)] = CurrTime; + CurrTime++; + Data.Stack.push_back(Curr.first); + Data.OnStack.set(uint32_t(Curr.first)); + } + // Curr.second > 0 implies that we came back from a recursive call + if (Curr.second > 0) { + //??? + setMin(Data.Low[size_t(Curr.first)], + Data.Low[size_t(Curr.second) - 1]); + } + // find the next recursive function call + while (Curr.second < Graph.getEdges(Curr.first).size() && + Data.Disc[size_t(Graph.getEdges(Curr.first)[Curr.second])]) { + GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; + if (Data.OnStack.test(uint32_t(W))) { + setMin(Data.Low[size_t(Curr.first)], Data.Disc[size_t(W)]); + } + Curr.second++; + // If a Node u is undiscovered i.e. Data.Disc[size_t(u)] = UINT32_MAX + // start a recursive function call + if (Curr.second < Graph.getEdges(Curr.first).size()) { + GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; + Data.CallStack.push_back({Curr.first, Curr.second++}); + Data.CallStack.push_back({U, 0}); + } + // If Curr.first is the root of a connected component i.e. Data.Disc = + // Data.Low + if (Data.Low[size_t(Curr.first)] == Data.Disc[size_t(Curr.first)]) { + //-> SCC found + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + + while (Data.Stack.back() != Curr.first) { + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.reset(uint32_t(Fun)); + Data.Seen.set(uint32_t(Fun)); + NodesInSCC.push_back(Fun); + } + } + } + } + } +} + +template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { + SCCHolder Ret{}; + + auto NumNodes = Graph.Adj.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) { + return Ret; + } + + SCCData Data(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) { + computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + } + } + + return Ret; +} + +// choose which Tarjan implementation will be executed +template +[[nodiscard]] SCCHolder execTarjan(const G &Graph, const bool Iterative) { + SCCHolder Ret{}; + + auto NumNodes = Graph.Adj.size(); + Ret.SCCOfNode.resize(NumNodes); + + if (!NumNodes) { + return Ret; + } + + SCCData Data(NumNodes); + SCCDataIt DataIt(NumNodes); + for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { + if (!Data.Seen.test(FunId)) { + if (Iterative) { + tarjanIt(Graph, DataIt, Ret); + } else { + computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + } + } + } + + return Ret; +} + +template +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers +computeSCCCallers(const G &Graph, const SCCHolder &SCCs); + +template +auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { + SCCCallers Ret; + Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); + + llvm::SmallBitVector Roots(SCCs.NumSCCs, true); + + size_t NodeId = 0; + for (const auto &SuccNodes : Graph.Adj) { + auto SrcSCC = SCCs.SCCOfNode[NodeId]; + + for (auto SuccNode : SuccNodes) { + auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; + if (DestSCC != SrcSCC) { + Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); + Roots.reset(uint32_t(DestSCC)); + } + } + + ++NodeId; + } + + Ret.SCCRoots.reserve(Roots.count()); + for (auto Rt : Roots.set_bits()) { + Ret.SCCRoots.push_back(SCCId(Rt)); + } + + return Ret; +} + +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder +computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); + +inline auto computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers) + -> SCCOrder { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.NumSCCs); + + llvm::SmallBitVector Seen; + Seen.resize(SCCs.NumSCCs); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.set(uint32_t(CurrSCC)); + for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { + if (!Seen.test(uint32_t(Caller))) { + Dfs(Dfs, Caller); + } + } + Ret.SCCIds.push_back(CurrSCC); + }; + + for (auto Leaf : Callers.SCCRoots) { + if (!Seen.test(uint32_t(Leaf))) { + Dfs(Dfs, Leaf); + } + } + + std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); + + return Ret; +} +} // namespace psr::analysis::call_graph + +namespace llvm { +template <> struct DenseMapInfo { + using SCCId = psr::analysis::call_graph::SCCId; + + static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } + static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } + static inline auto getHashValue(SCCId Id) noexcept { + return llvm::hash_value(uint32_t(Id)); + } + static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } +}; +} // namespace llvm diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp new file mode 100644 index 0000000000..0da687e2d2 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp @@ -0,0 +1,697 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/AliasSets.h" +#include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + +using namespace psr; +using namespace psr::analysis::call_graph; + +using TAGAliasHandler = llvm::function_ref; +using TAGAliasInfo = llvm::function_ref; + +static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { + OS << "var-"; + OS.write_escaped(psr::llvmIRToString(Var.Val)); +} + +static void printNodeImpl(llvm::raw_ostream &OS, Field Fld) { + OS << "fld-"; + OS.write_escaped(psr::llvmTypeToString(Fld.Base, true)); + OS << '+' << Fld.ByteOffset; +} + +static void printNodeImpl(llvm::raw_ostream &OS, Return Ret) { + OS << "ret-"; + OS.write_escaped(Ret.Fun->getName()); +} + +void analysis::call_graph::printNode(llvm::raw_ostream &OS, TAGNode TN) { + std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); +} + +static llvm::SmallBitVector +getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { + /// NOTE: Copied from SiLLiS + + llvm::SmallBitVector Ret; + + auto PointerSize = DL.getPointerSize(); + // LOGS("[getPointerIndicesOfType]: " << *Ty ); + auto MaxNumPointers = + !Ty->isSized() ? 1 : DL.getTypeAllocSize(Ty) / PointerSize; + if (!MaxNumPointers) { + return Ret; + } + Ret.resize(MaxNumPointers); + + llvm::SmallVector> WorkList = {{Ty, 0}}; + + while (!WorkList.empty()) { + auto [CurrTy, CurrByteOffs] = WorkList.pop_back_val(); + + if (CurrTy->isPointerTy()) { + size_t Idx = CurrByteOffs / PointerSize; + if (CurrByteOffs % PointerSize) [[unlikely]] { + llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer " + "found at offset " + << CurrByteOffs << " in type " << *Ty; + } + assert(Ret.size() > Idx && + "reserved unsufficient space for pointer indices"); + Ret.set(Idx); + continue; + } + + if (CurrTy->isArrayTy()) { + auto *ElemTy = CurrTy->getArrayElementType(); + auto ArrayLen = CurrTy->getArrayNumElements(); + auto ElemSize = DL.getTypeAllocSize(ElemTy); + for (size_t I = 0, Offs = CurrByteOffs; I < ArrayLen; + ++I, Offs += ElemSize) { + WorkList.emplace_back(ElemTy, Offs); + } + continue; + } + + if (auto *Struct = llvm::dyn_cast(CurrTy)) { + auto NumElems = Struct->getNumElements(); + const auto *SL = DL.getStructLayout(Struct); + for (size_t I = 0; I < NumElems; ++I) { + auto Offs = CurrByteOffs + SL->getElementOffset(I); + WorkList.emplace_back(Struct->getElementType(I), Offs); + } + continue; + } + } + + return Ret; +} + +static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { + TAG.Nodes.getOrInsert(TN); +} + +static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto &&Structs = Mod.getIdentifiedStructTypes(); + TAG.Nodes.reserve(TAG.Nodes.size() + Structs.size()); + + size_t PointerSize = DL.getPointerSize(); + + for (auto *ST : Structs) { + auto Offsets = getPointerIndicesOfType(ST, DL); + for (auto Offs : Offsets.set_bits()) { + addTAGNode({Field{ST, Offs * PointerSize}}, TAG); + } + addTAGNode({Field{ST, SIZE_MAX}}, TAG); + } +} + +static void addGlobals(const llvm::Module &Mod, TypeAssignmentGraph &TAG) { + auto NumGlobals = Mod.global_size(); + TAG.Nodes.reserve(TAG.Nodes.size() + NumGlobals); + + for (const auto &Glob : Mod.globals()) { + if (Glob.getValueType()->isIntOrIntVectorTy() || + Glob.getValueType()->isFloatingPointTy()) { + continue; + } + auto GlobName = Glob.getName(); + if (GlobName.startswith("_ZTV") || GlobName.startswith("_ZTI") || + GlobName.startswith("_ZTS")) { + continue; + } + + addTAGNode({Variable{&Glob}}, TAG); + } +} + +static void initializeWithFun(const llvm::Function *Fun, + TypeAssignmentGraph &TAG) { + // Add all params + // Add all locals + // Add return + + if (Fun->isDeclaration()) + return; + + for (const auto &Arg : Fun->args()) { + if (!Arg.getType()->isPointerTy()) + continue; + + addTAGNode({Variable{&Arg}}, TAG); + } + + for (const auto &I : llvm::instructions(Fun)) { + if (!I.getType()->isPointerTy()) { + // TODO: What about SSA structs that contain pointers? + continue; + } + + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (Alloca->getAllocatedType()->isIntOrIntVectorTy() || + Alloca->getAllocatedType()->isFloatingPointTy()) { + continue; + } + } + + addTAGNode({Variable{&I}}, TAG); + } + + if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) + addTAGNode({Return{Fun}}, TAG); +} + +[[nodiscard]] static bool isVTableOrFun(const llvm::Value *Val) { + const auto *Base = Val->stripPointerCastsAndAliases(); + if (llvm::isa(Base)) + return true; + + if (const auto *Glob = llvm::dyn_cast(Base)) + return Glob->isConstant() && Glob->getName().startswith("_ZTV"); + + return false; +} + +static void handleAlloca(const llvm::AllocaInst *Alloca, + TypeAssignmentGraph &TAG, + const psr::LLVMVFTableProvider &VTP) { + auto TN = TAG.get({Variable{Alloca}}); + if (!TN) + return; + + const auto *AllocTy = + llvm::dyn_cast(Alloca->getAllocatedType()); + if (!AllocTy) + return; + + if (const auto *TV = VTP.getVFTableGlobal(AllocTy)) { + TAG.TypeEntryPoints[*TN].insert(TV); + } +} + +static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto Offs = [&]() -> size_t { + llvm::APInt Offs(64, 0); + if (GEP->accumulateConstantOffset(DL, Offs)) { + return Offs.getZExtValue(); + } + return SIZE_MAX; + }(); + + return TAG.get({Field{GEP->getSourceElementType(), Offs}}); +} + +static void handleGEP(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{GEP}}); + if (!To) + return; + + if (!GEP->isInBounds()) { + auto From = TAG.get({Variable{GEP->getPointerOperand()}}); + + if (From && To) + TAG.addEdge(*From, *To); + + return; + } + // TODO: Is this correct? -- also check load + + auto From = getGEPNode(GEP, TAG, DL); + if (From) + TAG.addEdge(*From, *To); +} + +static bool handleEntryForStore(const llvm::StoreInst *Store, + TypeAssignmentGraph &TAG, TAGAliasInfo AI, + const llvm::DataLayout &DL) { + const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); + bool IsEntry = isVTableOrFun(Base); + + if (!IsEntry) + return false; + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.TypeEntryPoints[*GEPNodeId].insert(Base); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) + TAG.TypeEntryPoints[*ApproxDest].insert(Base); + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // TODO: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) + return; + + TAG.TypeEntryPoints[*DestNodeId].insert(Base); + }); + return true; +} + +static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, + TAGAliasInfo AI, const llvm::DataLayout &DL) { + + if (handleEntryForStore(Store, TAG, AI, DL)) + return; + + auto From = TAG.get({Variable{Store->getValueOperand()}}); + if (!From) + return; + + if (const auto *GEPDest = + llvm::dyn_cast(Store->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { + TAG.addEdge(*From, *GEPNodeId); + + auto GEPNode = TAG[*GEPNodeId]; + if (const auto *FldDest = std::get_if(&GEPNode.Label)) { + auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); + + if (ApproxDest) + TAG.addEdge(*From, *ApproxDest); + } + } + } + + AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { + // TODO: Fuse store and GEP! + + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) + return; + + TAG.addEdge(*From, *DestNodeId); + }); +} + +static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { + auto To = TAG.get({Variable{Load}}); + if (!To) + return; + + auto From = TAG.get({Variable{Load->getPointerOperand()}}); + if (From) + TAG.addEdge(*From, *To); + + if (const auto *GEPDest = + llvm::dyn_cast(Load->getPointerOperand())) { + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) + TAG.addEdge(*GEPNodeId, *To); + } +} + +static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { + auto To = TAG.get({Variable{Phi}}); + if (!To) + return; + + for (const auto &Inc : Phi->incoming_values()) { + auto From = TAG.get({Variable{Inc.get()}}); + if (From) + TAG.addEdge(*From, *To); + } +} + +static llvm::StringRef extractTypeName(llvm::StringRef CtorName) { + // Example: _ZN3OneC2Ev + + auto EndIdx = CtorName.rfind("C2E"); + if (EndIdx == llvm::StringRef::npos) + EndIdx = CtorName.rfind("C1E"); + + if (EndIdx == llvm::StringRef::npos) + EndIdx = CtorName.size(); + + auto StartIdx = EndIdx; + while (StartIdx) { + --StartIdx; + + if (llvm::isDigit(CtorName[StartIdx])) + break; + } + return CtorName.slice(StartIdx, EndIdx); +} +static llvm::StringRef extractTypeName(std::string &&) = delete; + +static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, + const llvm::Module &Mod, + const psr::LLVMVFTableProvider &VTP) { + if (!CompTy->getIdentifier().empty()) { + + std::string Buf; + auto TypeName = CompTy->getIdentifier(); + if (TypeName.startswith("_ZTS") || TypeName.startswith("_ZTI")) { + Buf = TypeName.str(); + Buf[3] = 'V'; + TypeName = Buf; + } + + if (const auto *GlobTV = Mod.getNamedGlobal(TypeName)) { + return GlobTV; + } + if (const auto *Alias = Mod.getNamedAlias(TypeName)) { + return Alias->getAliasee()->stripPointerCastsAndAliases(); + } + + return nullptr; + } + + auto ClearName = CompTy->getName().str(); + const auto *Scope = CompTy->getScope(); + while (llvm::isa_and_nonnull(Scope)) { + ClearName = Scope->getName().str().append("::").append(ClearName); + Scope = Scope->getScope(); + } + + return VTP.getVFTableGlobal(ClearName); +} + +static void handleEntryForCall(const llvm::CallBase *Call, GraphNodeId CSNod, + TypeAssignmentGraph &TAG, + const llvm::Function *Callee, + const psr::LLVMVFTableProvider &VTP) { + + if (!psr::isHeapAllocatingFunction(Callee)) + return; + + if (const auto *MDNode = Call->getMetadata("heapallocsite")) { + + // Shortcut + if (const auto *CompTy = llvm::dyn_cast(MDNode); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + if (const auto *Ty = getTypeFromDI(CompTy, *Call->getModule(), VTP)) { + + TAG.TypeEntryPoints[CSNod].insert(Ty); + return; + } + } + } + // TODO: Fallback solution + + // llvm::SmallDenseSet Seen; + // llvm::SmallVector WL = {Call}; + + // // Search for the ctor call + + // const auto *CallerFun = Call->getFunction(); + + // while (!WL.empty()) { + // const auto *CurrObj = WL.pop_back_val(); + // for (const auto &Use : CurrObj->uses()) { + // const auto *User = llvm::dyn_cast(Use.getUser()); + // if (!User || User->getFunction() != CallerFun) + // continue; + + // if (const auto *Cast = llvm::dyn_cast(User); + // Cast && Cast->getDestTy()->isPointerTy()) { + // if (Seen.insert(Cast).second) + // WL.push_back(Cast); + + // continue; + // } + + // if (const auto *CtorCall = llvm::dyn_cast(User); + // CtorCall && CtorCall->getCalledFunction() && + // Use == CtorCall->getArgOperand(0)) { + // auto CtorName = CtorCall->getCalledFunction()->getName(); + // if (psr::isConstructor(CtorName)) { + // auto DemangledCtorName = llvm::demangle(CtorName.str()); + + // auto TypeName = extractTypeName(CtorName); + + // // TODO + // } + // // TODO: Extract type from ctor fun + // } + // } + // } +} + +static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, + const psr::CallGraph &BaseCG, + const psr::LLVMVFTableProvider &VTP) { + + llvm::SmallVector> Args; + llvm::SmallBitVector EntryArgs; + bool HasArgNode = false; + + for (const auto &Arg : Call->args()) { + auto TN = TAG.get({Variable{Arg.get()}}); + Args.push_back(TN); + if (TN) + HasArgNode = true; + + bool IsEntry = isVTableOrFun(Arg.get()); + EntryArgs.push_back(IsEntry); + } + + auto CSNod = TAG.get({Variable{Call}}); + + // TODO: Handle struct returns that contain pointers + if (!HasArgNode && !CSNod) + return; + + for (const auto *Callee : BaseCG.getCalleesOfCallAt(Call)) { + handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); + + for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { + auto ParamNodId = TAG.get({Variable{&Param}}); + if (!ParamNodId) + continue; + + if (EntryArgs.test(Param.getArgNo())) { + TAG.TypeEntryPoints[*ParamNodId].insert( + Call->getArgOperand(Param.getArgNo()) + ->stripPointerCastsAndAliases()); + } + + if (!Arg) + continue; + + if (!Param.hasStructRetAttr()) + TAG.addEdge(*Arg, *ParamNodId); + + // if (!Param.hasByValAttr()) + // TAG.addEdge(*ParamNodId, *Arg); + } + if (CSNod) { + auto RetNod = TAG.get({Return{Callee}}); + if (RetNod) + TAG.addEdge(*RetNod, *CSNod); + } + } +} + +static void handleReturn(const llvm::ReturnInst *Ret, + TypeAssignmentGraph &TAG) { + + auto TNId = TAG.get({Return{Ret->getFunction()}}); + if (!TNId) + return; + + if (const auto *RetVal = Ret->getReturnValue()) { + const auto *Base = RetVal->stripPointerCastsAndAliases(); + if (isVTableOrFun(Base)) { + TAG.TypeEntryPoints[*TNId].insert(Base); + return; + } + + auto From = TAG.get({Variable{Base}}); + if (From) + TAG.addEdge(*From, *TNId); + } +} + +static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, + const psr::CallGraph &BaseCG, + TAGAliasInfo AI, const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + handleAlloca(Alloca, TAG, VTP); + return; + } + if (const auto *Load = llvm::dyn_cast(&I)) { + handleLoad(Load, TAG, DL); + return; + } + if (const auto *GEP = llvm::dyn_cast(&I)) { + handleGEP(GEP, TAG, DL); + return; + } + if (const auto *Store = llvm::dyn_cast(&I)) { + handleStore(Store, TAG, AI, DL); + return; + } + if (const auto *Phi = llvm::dyn_cast(&I)) { + handlePhi(Phi, TAG); + return; + } + if (const auto *Cast = llvm::dyn_cast(&I)) { + auto From = TAG.get({Variable{Cast->getOperand(0)}}); + auto To = TAG.get({Variable{Cast}}); + + if (From && To) + TAG.addEdge(*From, *To); + } + if (const auto *Call = llvm::dyn_cast(&I)) { + handleCall(Call, TAG, BaseCG, VTP); + return; + } + if (const auto *Ret = llvm::dyn_cast(&I)) { + handleReturn(Ret, TAG); + return; + } + // TODO: Handle more cases +} + +static void buildTAGWithFun( + const llvm::Function *Fun, TypeAssignmentGraph &TAG, + const psr::CallGraph + &BaseCG, + TAGAliasInfo AI, const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { + for (const auto &I : llvm::instructions(Fun)) { + dispatch(I, TAG, BaseCG, AI, DL, VTP); + } +} + +static auto computeTypeAssignmentGraphImpl( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + TAGAliasInfo AI, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + TypeAssignmentGraph TAG; + + const auto &DL = Mod.getDataLayout(); + + addFields(Mod, TAG, DL); + addGlobals(Mod, TAG); + + for (const auto &Fun : Mod) { + initializeWithFun(&Fun, TAG); + } + + TAG.Adj.resize(TAG.Nodes.size()); + + for (const auto &Fun : Mod) { + buildTAGWithFun(&Fun, TAG, BaseCG, AI, DL, VTP); + } + + return TAG; +} + +auto analysis::call_graph::computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + FilteredAliasSet FAS(AS); + return computeTypeAssignmentGraphImpl( + Mod, BaseCG, + [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { + FAS.foreachAlias(Fact, At, Handler); + }, + VTP); +} + +auto analysis::call_graph::computeTypeAssignmentGraph( + const llvm::Module &Mod, + const psr::CallGraph + &BaseCG, + const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP) + -> TypeAssignmentGraph { + AliasInfo AI(&ObjGraph); + FilteredAliasSet FAS(AI.aliases()); + return computeTypeAssignmentGraphImpl( + Mod, BaseCG, + [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { + FAS.foreachAlias(Fact, At, Handler); + }, + VTP); +} + +void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { + OS << "digraph TAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + size_t Ctr = 0; + for (const auto &TN : Nodes) { + OS << " " << Ctr << "[label=\""; + printNode(OS, TN); + OS << "\"];\n"; + + ++Ctr; + } + + OS << '\n'; + + Ctr = 0; + for (const auto &Targets : Adj) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp new file mode 100644 index 0000000000..2e677091e8 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp @@ -0,0 +1,84 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Utils/TypePropagator.h" + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" +#include "phasar/PhasarLLVM/Utils/Compressor.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" + +using namespace psr; +using namespace psr::analysis::call_graph; + +static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + for (const auto &[Node, Types] : TAG.TypeEntryPoints) { + auto SCC = SCCs.SCCOfNode[size_t(Node)]; + TA.TypesPerSCC[size_t(SCC)].insert(Types.begin(), Types.end()); + } +} + +static void propagate(TypeAssignment &TA, const SCCCallers &Deps, + SCCId CurrSCC) { + const auto &Types = TA.TypesPerSCC[size_t(CurrSCC)]; + if (Types.empty()) + return; + + for (auto Succ : Deps.ChildrenOfSCC[size_t(CurrSCC)]) { + TA.TypesPerSCC[size_t(Succ)].insert(Types.begin(), Types.end()); + } +} + +TypeAssignment analysis::call_graph::propagateTypes( + const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, + const SCCCallers &Deps, const SCCOrder &Order) { + TypeAssignment Ret; + Ret.TypesPerSCC.resize(SCCs.NumSCCs); + + initialize(Ret, TAG, SCCs); + for (auto SCC : Order.SCCIds) { + propagate(Ret, Deps, SCC); + } + + return Ret; +} + +void TypeAssignment::print(llvm::raw_ostream &OS, + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + OS << "digraph TypeAssignment {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + Compressor Types; + auto GetOrAddType = [&](const llvm::Value *Ty) { + auto [Id, Inserted] = Types.insert(Ty); + if (Inserted) { + OS << (size_t(Id) + SCCs.NumSCCs) << "[label=\""; + OS.write_escaped(Ty->getName()); + OS << "\"];\n"; + } + return Id + SCCs.NumSCCs; + }; + + for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { + OS << " " << Ctr << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = TAG.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + + for (const auto *Ty : TypesPerSCC[Ctr]) { + auto TyId = GetOrAddType(Ty); + OS << Ctr << "->" << TyId << ";\n"; + } + } +} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp index 099592d172..b9e6bced3b 100644 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp +++ b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp @@ -7,11 +7,12 @@ * Fabian Schiebel and other *****************************************************************************/ -#include "SCCGeneric.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" + +#include "phasar/PhasarLLVM/Utils/Compressor.h" #include "llvm/ADT/SmallBitVector.h" -#include "../../../../../utils/include/phasar/Utils/Compressor.h" #include "TypeAssignmentGraph.h" #include From c38244f2c8eeb387b74b7116ef73f1bfa95919b8 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Wed, 28 Aug 2024 18:02:25 +0200 Subject: [PATCH 04/27] SCCGeneric.h, TypeTraits.h and TypeAssignmentGraph.h updated --- .../TypeAssignmentGraph.h | 8 +- .../{Utils => ControlFlow}/TypePropagator.h | 0 .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 103 ++- include/phasar/PhasarLLVM/Utils/Compressor.h | 3 +- .../phasar/Utils}/SCCGeneric.h | 41 +- include/phasar/Utils/TypeTraits.h | 16 + .../DataFlow/IfdsIde/TypeAssignmentGraph.cpp | 12 +- .../DataFlow/IfdsIde/TypePropagator.cpp | 5 +- .../phasar/PhasarLLVM/ControlFlow/CallGraph.h | 304 -------- .../include/phasar/PhasarLLVM/Utils/SCC.cpp | 196 ----- .../include/phasar/PhasarLLVM/Utils/SCC.h | 71 -- .../phasar/PhasarLLVM/Utils/SCCGeneric.cpp | 197 ----- .../phasar/PhasarLLVM/Utils/SCCGeneric.h | 72 -- .../PhasarLLVM/Utils/TypeAssignmentGraph.cpp | 698 ------------------ .../PhasarLLVM/Utils/TypeAssignmentGraph.h | 150 ---- .../utils/include/phasar/Utils/Compressor.h | 212 ------ .../Utils/SCCGenericTest.cpp | 0 17 files changed, 106 insertions(+), 1982 deletions(-) rename include/phasar/PhasarLLVM/{Utils => ControlFlow}/TypeAssignmentGraph.h (95%) rename include/phasar/PhasarLLVM/{Utils => ControlFlow}/TypePropagator.h (100%) rename {lib/PhasarLLVM/DataFlow/IfdsIde => include/phasar/Utils}/SCCGeneric.h (90%) delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp delete mode 100644 phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h delete mode 100644 phasar/utils/include/phasar/Utils/Compressor.h rename lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp => unittests/Utils/SCCGenericTest.cpp (100%) diff --git a/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h similarity index 95% rename from include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h rename to include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h index c652eb24e9..667b23ac42 100644 --- a/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h @@ -35,7 +35,7 @@ class FilteredAliasSet; namespace psr::analysis::call_graph { -enum class [[clang::enum_extensibility(open)]] GraphNodeId : uint32_t{}; +enum class [[clang::enum_extensibility(open)]] TAGNodeId : uint32_t{}; struct Variable { const llvm::Value *Val; @@ -97,8 +97,8 @@ template <> struct DenseMapInfo { } }; -template <> struct DenseMapInfo { - using GraphNodeId = psr::analysis::call_graph::GraphNodeId; +template <> struct DenseMapInfo { + using GraphNodeId = psr::analysis::call_graph::TAGNodeId; inline static GraphNodeId getEmptyKey() noexcept { return GraphNodeId(-1); } inline static GraphNodeId getTombstoneKey() noexcept { return GraphNodeId(-2); @@ -117,7 +117,7 @@ namespace psr::analysis::call_graph { struct ObjectGraph; struct TypeAssignmentGraph { - + using GraphNodeId = TAGNodeId; Compressor Nodes; llvm::SmallVector, 0> Adj; diff --git a/include/phasar/PhasarLLVM/Utils/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h similarity index 100% rename from include/phasar/PhasarLLVM/Utils/TypePropagator.h rename to include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h index 69e9fa2ade..f7250a6948 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -7,13 +7,12 @@ * Fabian Schiebel and other *****************************************************************************/ -#pragma once -// error in included header Compressor.h -// #include "phasar/PhasarLLVM/Utils/Compressor.h" +// header guards hinzufügen -#include "phasar/Utils/Utilities.h" +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" @@ -30,12 +29,12 @@ class LLVMBasedICFG; namespace psr::analysis::call_graph { // struct TypeAssignmentGraph; -enum class GraphNodeId : uint32_t; +// enum class GraphNodeId : uint32_t; enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; // holds the scc's of a given graph -struct SCCHolder { +template struct SCCHolder { llvm::SmallVector SCCOfNode{}; llvm::SmallVector> NodesInSCC{}; size_t NumSCCs = 0; @@ -43,12 +42,12 @@ struct SCCHolder { // holds a graph were the scc's are compressed to a single node. Resulting graph // is a DAG -struct SCCCallers { +template struct SCCCallers { llvm::SmallVector, 0> ChildrenOfSCC{}; llvm::SmallVector SCCRoots{}; - template - void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); + void print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, const G &Graph); }; // holds topologically sorted scccallers @@ -56,7 +55,7 @@ struct SCCOrder { llvm::SmallVector SCCIds; }; -struct SCCData { +template struct SCCData { llvm::SmallVector Disc; llvm::SmallVector Low; llvm::SmallBitVector OnStack; @@ -69,7 +68,7 @@ struct SCCData { Seen(NumFuns) {} }; -struct SCCDataIt { +template struct SCCDataIt { llvm::SmallVector Disc; llvm::SmallVector Low; llvm::SmallBitVector OnStack; @@ -91,8 +90,9 @@ static void setMin(uint32_t &InOut, uint32_t Other) { // TODO: Non-recursive version template -static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, - SCCHolder &Holder) { +static void computeSCCsRec(const G &Graph, typename G::GraphNodeId CurrNode, + SCCData &Data, + SCCHolder &Holder) { // See // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components @@ -142,12 +142,13 @@ static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, // Iterative IMplementation for Tarjan's SCC Alg. // -> Heapoverflow through simulated Stack? template -static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { +static void tarjanIt(const G &Graph, SCCDataIt &Data, + SCCHolder &Holder) { auto CurrTime = Data.Time; - for (uint32_t Vertex = 0; Vertex < Graph.Nodes.size(); Vertex++) { + for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { - Data.CallStack.push_back({GraphNodeId(Vertex), 0}); + Data.CallStack.push_back({G::GraphNodeId(Vertex), 0}); while (!Data.CallStack.empty()) { auto Curr = Data.CallStack.pop_back_val(); // Curr.second = 0 implies that Curr.fist was not visited before @@ -167,7 +168,7 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { // find the next recursive function call while (Curr.second < Graph.getEdges(Curr.first).size() && Data.Disc[size_t(Graph.getEdges(Curr.first)[Curr.second])]) { - GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; + typename G::GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; if (Data.OnStack.test(uint32_t(W))) { setMin(Data.Low[size_t(Curr.first)], Data.Disc[size_t(W)]); } @@ -175,7 +176,7 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { // If a Node u is undiscovered i.e. Data.Disc[size_t(u)] = UINT32_MAX // start a recursive function call if (Curr.second < Graph.getEdges(Curr.first).size()) { - GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; + typename G::GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; Data.CallStack.push_back({Curr.first, Curr.second++}); Data.CallStack.push_back({U, 0}); } @@ -208,10 +209,11 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { } } -template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { - SCCHolder Ret{}; +template +[[nodiscard]] SCCHolder computeSCCs(const G &Graph) { + SCCHolder Ret{}; - auto NumNodes = Graph.Nodes.size(); + auto NumNodes = Graph.Adj.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -221,7 +223,7 @@ template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { SCCData Data(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { if (!Data.Seen.test(FunId)) { - computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); } } @@ -230,10 +232,11 @@ template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { // choose which Tarjan implementation will be executed template -[[nodiscard]] SCCHolder execTarjan(const G &Graph, const bool Iterative) { - SCCHolder Ret{}; +[[nodiscard]] SCCHolder +execTarjan(const G &Graph, const bool Iterative) { + SCCHolder Ret{}; - auto NumNodes = Graph.Nodes.size(); + auto NumNodes = Graph.Adj.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -241,12 +244,13 @@ template } SCCData Data(NumNodes); + SCCDataIt DataIt(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { if (!Data.Seen.test(FunId)) { if (Iterative) { - TarjanIt(Graph, GraphNodeId(FunId), Data, Ret); + tarjanIt(Graph, DataIt, Ret); } else { - computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); } } } @@ -255,12 +259,15 @@ template } template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers -computeSCCCallers(const G &Graph, const SCCHolder &SCCs); +[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers +computeSCCCallers(const G &Graph, + const SCCHolder &SCCs); template -auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { - SCCCallers Ret; +auto computeSCCCallers(const G &Graph, + const SCCHolder &SCCs) + -> SCCCallers { + SCCCallers Ret; Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); llvm::SmallBitVector Roots(SCCs.NumSCCs, true); @@ -289,36 +296,12 @@ auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { } template -void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, - const G &Graph) { - OS << "digraph SCCTAG {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { - OS << " " << Ctr << "[label=\""; - for (auto TNId : SCCs.NodesInSCC[Ctr]) { - auto TN = Graph.Nodes[TNId]; - printNode(OS, TN); - OS << "\\n"; - } - OS << "\"];\n"; - } - - OS << '\n'; - - size_t Ctr = 0; - for (const auto &Targets : ChildrenOfSCC) { - for (auto Tgt : Targets) { - OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; - } - ++Ctr; - } -} - [[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder -computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); - -inline auto computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers) +computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers); +template +inline auto computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers) -> SCCOrder { SCCOrder Ret; Ret.SCCIds.reserve(SCCs.NumSCCs); diff --git a/include/phasar/PhasarLLVM/Utils/Compressor.h b/include/phasar/PhasarLLVM/Utils/Compressor.h index 4fbba84490..1218c228e7 100644 --- a/include/phasar/PhasarLLVM/Utils/Compressor.h +++ b/include/phasar/PhasarLLVM/Utils/Compressor.h @@ -7,8 +7,7 @@ * Fabian Schiebel and other *****************************************************************************/ -#pragma once - +// nach phasar utils #include "phasar/Utils/ByRef.h" #include "phasar/Utils/TypeTraits.h" diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h similarity index 90% rename from lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h rename to include/phasar/Utils/SCCGeneric.h index c4248e3e7c..197acf15e5 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -11,7 +11,7 @@ // error in included header Compressor.h // #include "phasar/PhasarLLVM/Utils/Compressor.h" -// #include "phasar/Utils/Utilities.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SmallBitVector.h" @@ -139,13 +139,13 @@ static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, } } -// Iterative IMplementation for Tarjan's SCC Alg. +// Iterative Implementation for Tarjan's SCC Alg. // -> Heapoverflow through simulated Stack? template static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { auto CurrTime = Data.Time; - for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { + for (uint32_t Vertex = 0; Vertex < Graph.Nodes.size(); Vertex++) { if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { Data.CallStack.push_back({GraphNodeId(Vertex), 0}); while (!Data.CallStack.empty()) { @@ -160,7 +160,6 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { } // Curr.second > 0 implies that we came back from a recursive call if (Curr.second > 0) { - //??? setMin(Data.Low[size_t(Curr.first)], Data.Low[size_t(Curr.second) - 1]); } @@ -211,7 +210,7 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { SCCHolder Ret{}; - auto NumNodes = Graph.Adj.size(); + auto NumNodes = Graph.Nodes.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -233,7 +232,7 @@ template [[nodiscard]] SCCHolder execTarjan(const G &Graph, const bool Iterative) { SCCHolder Ret{}; - auto NumNodes = Graph.Adj.size(); + auto NumNodes = Graph.Nodes.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -241,11 +240,10 @@ template } SCCData Data(NumNodes); - SCCDataIt DataIt(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { if (!Data.Seen.test(FunId)) { if (Iterative) { - tarjanIt(Graph, DataIt, Ret); + TarjanIt(Graph, GraphNodeId(FunId), Data, Ret); } else { computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); } @@ -289,6 +287,33 @@ auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { return Ret; } +template +void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, + const G &Graph) { + OS << "digraph SCCTAG {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { + OS << " " << Ctr << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = Graph.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + } + + OS << '\n'; + + size_t Ctr = 0; + for (const auto &Targets : ChildrenOfSCC) { + for (auto Tgt : Targets) { + OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; + } + ++Ctr; + } +} + [[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 2c6d771c75..4f10c3b9bb 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -172,6 +172,18 @@ struct variant_idx, T> size_t, std::variant...>(type_identity{}).index()> {}; +template +struct has_llvm_dense_map_info : std::false_type {}; +template +struct has_llvm_dense_map_info< + T, std::void_t::getEmptyKey()), + decltype(llvm::DenseMapInfo::getTombstoneKey()), + decltype(llvm::DenseMapInfo::getHashValue( + std::declval())), + decltype(llvm::DenseMapInfo::isEqual(std::declval(), + std::declval()))>> + : std::true_type {}; + } // namespace detail template @@ -246,6 +258,10 @@ template using type_identity_t = typename type_identity::type; template static constexpr size_t variant_idx = detail::variant_idx::value; +template +static constexpr bool has_llvm_dense_map_info = + detail::has_llvm_dense_map_info::value; + struct TrueFn { template [[nodiscard]] bool operator()(const Args &.../*unused*/) const noexcept { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp index 0da687e2d2..a8a30b1d38 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp @@ -7,7 +7,7 @@ * Fabian Schiebel and other *****************************************************************************/ -#include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" @@ -231,9 +231,9 @@ static void handleAlloca(const llvm::AllocaInst *Alloca, } } -static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, - TypeAssignmentGraph &TAG, - const llvm::DataLayout &DL) { +static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, + TypeAssignmentGraph &TAG, + const llvm::DataLayout &DL) { auto Offs = [&]() -> size_t { llvm::APInt Offs(64, 0); if (GEP->accumulateConstantOffset(DL, Offs)) { @@ -422,7 +422,7 @@ static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, return VTP.getVFTableGlobal(ClearName); } -static void handleEntryForCall(const llvm::CallBase *Call, GraphNodeId CSNod, +static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, TypeAssignmentGraph &TAG, const llvm::Function *Callee, const psr::LLVMVFTableProvider &VTP) { @@ -490,7 +490,7 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, const llvm::Function *> &BaseCG, const psr::LLVMVFTableProvider &VTP) { - llvm::SmallVector> Args; + llvm::SmallVector> Args; llvm::SmallBitVector EntryArgs; bool HasArgNode = false; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp index 2e677091e8..ffa934bedb 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp @@ -7,8 +7,9 @@ * Fabian Schiebel and other *****************************************************************************/ -#include "phasar/PhasarLLVM/Utils/TypePropagator.h" +#include "phasar/PhasarLLVM/ControlFlow/TypePropagator.h" +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" #include "phasar/PhasarLLVM/Utils/Compressor.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" @@ -18,7 +19,7 @@ using namespace psr; using namespace psr::analysis::call_graph; static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { + const SCCHolder &SCCs) { for (const auto &[Node, Types] : TAG.TypeEntryPoints) { auto SCC = SCCs.SCCOfNode[size_t(Node)]; TA.TypesPerSCC[size_t(SCC)].insert(Types.begin(), Types.end()); diff --git a/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h b/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h deleted file mode 100644 index 590b16964e..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/ControlFlow/CallGraph.h +++ /dev/null @@ -1,304 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and others - *****************************************************************************/ - -#ifndef PHASAR_CONTROLFLOW_CALLGRAPH_H -#define PHASAR_CONTROLFLOW_CALLGRAPH_H - -#include "phasar/ControlFlow/CallGraphBase.h" -#include "phasar/Utils/ByRef.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/StableVector.h" -#include "phasar/Utils/Utilities.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" - -#include "nlohmann/json.hpp" - -#include -#include -#include - -namespace psr { -template class CallGraphBuilder; -template class CallGraph; - -template struct CGTraits> { - using n_t = N; - using f_t = F; -}; - -/// An explicit graph-representation of a call-graph. Only represents the data, -/// not the call-graph analysis that creates it. -/// -/// This type is immutable. To incrementally build it from your call-graph -/// analysis, use the CallGraphBuilder -template -class CallGraph : public CallGraphBase> { - using base_t = CallGraphBase>; - friend base_t; - friend class CallGraphBuilder; - -public: - using typename base_t::f_t; - using typename base_t::n_t; - using FunctionVertexTy = llvm::SmallVector; - using InstructionVertexTy = llvm::SmallVector; - - /// Creates a new, empty call-graph - CallGraph() noexcept = default; - - /// Deserializes a previously computed call-graph - template - [[nodiscard]] static CallGraph - deserialize(const nlohmann::json &PrecomputedCG, - FunctionGetter GetFunctionFromName, - InstructionGetter GetInstructionFromId); - - /// A range of all functions that are vertices in the call-graph. The number - /// of vertex functions can be retrieved by getNumVertexFunctions(). - [[nodiscard]] auto getAllVertexFunctions() const noexcept { - return llvm::make_first_range(CallersOf); - } - - /// A range of all call-sites that are vertices in the call-graph. The number - /// of vertex-callsites can be retrived by getNumVertexCallSites(). - [[nodiscard]] auto getAllVertexCallSites() const noexcept { - return llvm::make_first_range(CalleesAt); - } - - [[nodiscard]] size_t getNumVertexFunctions() const noexcept { - return CallersOf.size(); - } - [[nodiscard]] size_t getNumVertexCallSites() const noexcept { - return CalleesAt.size(); - } - - /// The number of functions within this call-graph - [[nodiscard]] size_t size() const noexcept { return getNumVertexFunctions(); } - - [[nodiscard]] bool empty() const noexcept { return CallersOf.empty(); } - - /// Creates a JSON representation of this call-graph suitable for presistent - /// storage. - /// Use the ctor taking a json object for deserialization - template - [[nodiscard]] nlohmann::json getAsJson(FunctionIdGetter GetFunctionId, - InstIdGetter GetInstructionId) const { - nlohmann::json J; - - for (const auto &[Fun, Callers] : CallersOf) { - auto &JCallers = J[std::invoke(GetFunctionId, Fun)]; - - for (const auto &CS : *Callers) { - JCallers.push_back(std::invoke(GetInstructionId, CS)); - } - } - - return J; - } - - template - void printAsDot(llvm::raw_ostream &OS, FunctionLabelGetter GetFunctionLabel, - InstParentGetter GetFunctionFromInst, - InstLabelGetter GetInstLabel) const { - OS << "digraph CallGraph{\n"; - scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - - llvm::DenseMap Fun2Id; - Fun2Id.reserve(CallersOf.size()); - - size_t CurrId = 0; - for (const auto &Fun : getAllVertexFunctions()) { - OS << CurrId << "[label=\""; - OS.write_escaped(std::invoke(GetFunctionLabel, Fun)) << "\"];\n"; - Fun2Id[Fun] = CurrId++; - } - - for (const auto &[CS, Callees] : CalleesAt) { - const auto &Fun = std::invoke(GetFunctionFromInst, CS); - - for (const auto &Succ : *Callees) { - OS << Fun2Id.lookup(Fun) << "->" << Fun2Id.lookup(Succ) << "[label=\""; - OS.write_escaped(std::invoke(GetInstLabel, CS)) << "\"];\n"; - } - } - } - -private: - [[nodiscard]] llvm::ArrayRef - getCalleesOfCallAtImpl(ByConstRef Inst) const noexcept { - if (const auto *CalleesPtr = CalleesAt.lookup(Inst)) { - return *CalleesPtr; - } - return {}; - } - - [[nodiscard]] llvm::ArrayRef - getCallersOfImpl(ByConstRef Fun) const noexcept { - if (const auto *CallersPtr = CallersOf.lookup(Fun)) { - return *CallersPtr; - } - return {}; - } - - // --- - - StableVector InstVertexOwner; - std::vector FunVertexOwner; - - llvm::DenseMap CalleesAt{}; - llvm::DenseMap CallersOf{}; -}; - -/// A mutable wrapper over a CallGraph. Use this to build a call-graph from -/// within your call-graph ananlysis. -template class CallGraphBuilder { -public: - using n_t = typename CallGraph::n_t; - using f_t = typename CallGraph::f_t; - using FunctionVertexTy = typename CallGraph::FunctionVertexTy; - using InstructionVertexTy = typename CallGraph::InstructionVertexTy; - - void reserve(size_t MaxNumFunctions) { - CG.FunVertexOwner.reserve(MaxNumFunctions); - CG.CalleesAt.reserve(MaxNumFunctions); - CG.CallersOf.reserve(MaxNumFunctions); - } - - /// Registeres a new function in the call-graph. Returns a list of all - /// call-sites that are known so far to potentially call this function. - /// Do not manually add elements to this vector -- use addCallEdge instead. - [[nodiscard]] FunctionVertexTy *addFunctionVertex(f_t Fun) { - auto [It, Inserted] = CG.CallersOf.try_emplace(std::move(Fun), nullptr); - if (Inserted) { - auto Cap = CG.FunVertexOwner.capacity(); - assert(CG.FunVertexOwner.size() < Cap && - "Trying to add more than MaxNumFunctions Function Vertices"); - It->second = &CG.FunVertexOwner.emplace_back(); - } - return It->second; - } - - /// Registeres a new call-site in the call-graph. Returns a list of all - /// callee functions that are known so far to potentially be called by this - /// function. - /// Do not manually add elements to this vector -- use addCallEdge instead. - [[nodiscard]] InstructionVertexTy *addInstructionVertex(n_t Inst) { - auto [It, Inserted] = CG.CalleesAt.try_emplace(std::move(Inst), nullptr); - if (Inserted) { - It->second = &CG.InstVertexOwner.emplace_back(); - } - return It->second; - } - - /// Tries to lookup the InstructionVertex for the given call-site. Returns - /// nullptr on failure. - [[nodiscard]] InstructionVertexTy * - getInstVertexOrNull(ByConstRef Inst) const noexcept { - return CG.CalleesAt.lookup(Inst); - } - - /// Adds a new directional edge to the call-graph indicating that CS may call - /// Callee - void addCallEdge(n_t CS, f_t Callee) { - auto IVtx = addInstructionVertex(CS); - auto FVtx = addFunctionVertex(Callee); - addCallEdge(std::move(CS), IVtx, std::move(Callee), FVtx); - } - - /// Same as addCallEdge(n_t, f_t), but uses an already known - /// InstructionVertexTy to save a lookup - void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee) { - auto *Callers = addFunctionVertex(Callee); - addCallEdge(std::move(CS), Callees, std::move(Callee), Callers); - } - - /// Same as addCallEdge(n_t, f_t), but uses an already known - /// FunctionVertexTy to save a lookup - void addCallEdge(n_t CS, f_t Callee, FunctionVertexTy *Callers) { - auto *Callees = addInstructionVertex(CS); - addCallEdge(std::move(CS), Callees, std::move(Callee), Callers); - } - - /// Moves the completely built call-graph out of this builder for further - /// use. Do not use the builder after it anymore. - [[nodiscard]] CallGraph consumeCallGraph() noexcept { - return std::move(CG); - } - - /// Returns a view on the current (partial) call-graph that has already been - /// constructed - [[nodiscard]] const CallGraph &viewCallGraph() const noexcept { - return CG; - } - -private: - void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee, - FunctionVertexTy *Callers) { - Callees->push_back(std::move(Callee)); - Callers->push_back(std::move(CS)); - } - - CallGraph CG{}; -}; - -template -template -[[nodiscard]] CallGraph -CallGraph::deserialize(const nlohmann::json &PrecomputedCG, - FunctionGetter GetFunctionFromName, - InstructionGetter GetInstructionFromId) { - if (!PrecomputedCG.is_object()) { - PHASAR_LOG_LEVEL_CAT(ERROR, "CallGraph", "Invalid Json. Expected object"); - return {}; - } - - CallGraphBuilder CGBuilder; - CGBuilder.reserve(PrecomputedCG.size()); - - for (const auto &[FunName, CallerIDs] : PrecomputedCG.items()) { - const auto &Fun = std::invoke(GetFunctionFromName, FunName); - if (!Fun) { - PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph", - "Invalid function name: " << FunName); - continue; - } - - auto *CEdges = CGBuilder.addFunctionVertex(Fun); - CEdges->reserve(CallerIDs.size()); - - for (const auto &JId : CallerIDs) { - auto Id = JId.get(); - const auto &CS = std::invoke(GetInstructionFromId, Id); - if (!CS) { - PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph", - "Invalid CAll-Instruction Id: " << Id); - } - - CGBuilder.addCallEdge(CS, Fun); - } - } - return CGBuilder.consumeCallGraph(); -} -} // namespace psr - -namespace llvm { -class Function; -class Instruction; -} // namespace llvm - -extern template class psr::CallGraph; -extern template class psr::CallGraphBuilder; - -#endif // PHASAR_CONTROLFLOW_CALLGRAPH_H diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp deleted file mode 100644 index a210b91f35..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#include "SCC.h" - -#include "llvm/ADT/SmallBitVector.h" - -#include "../../../../../utils/include/phasar/Utils/Compressor.h" -#include "TypeAssignmentGraph.h" - -#include -#include -#include - -using namespace psr; - -using SCCId = analysis::call_graph::SCCId; - -struct SCCData { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; - llvm::SmallVector Stack; - uint32_t Time = 0; - llvm::SmallBitVector Seen; - - explicit SCCData(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} -}; - -static void setMin(uint32_t &InOut, uint32_t Other) { - if (Other < InOut) - InOut = Other; -} - -// TODO: Non-recursive version -static void computeSCCsRec(const analysis::call_graph::TypeAssignmentGraph &TAG, - analysis::call_graph::TAGNodeId CurrNode, - SCCData &Data, - psr::analysis::call_graph::SCCHolder &Holder) { - // See - // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components - - auto CurrTime = Data.Time++; - Data.Disc[size_t(CurrNode)] = CurrTime; - Data.Low[size_t(CurrNode)] = CurrTime; - Data.Stack.push_back(CurrNode); - Data.OnStack.set(uint32_t(CurrNode)); - - for (auto SuccNode : TAG.Adj[size_t(CurrNode)]) { - if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { - // Tree-edge: Not seen yet --> recurse - - computeSCCsRec(TAG, SuccNode, Data, Holder); - setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); - } else if (Data.OnStack.test(uint32_t(SuccNode))) { - // Back-edge --> circle! - - setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); - } - } - - if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { - // Found SCC - - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != CurrNode) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } -} - -auto analysis::call_graph::computeSCCs(const TypeAssignmentGraph &TAG) - -> SCCHolder { - SCCHolder Ret{}; - - auto NumNodes = TAG.Nodes.size(); - Ret.SCCOfNode.resize(NumNodes); - - if (!NumNodes) - return Ret; - - SCCData Data(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) - computeSCCsRec(TAG, TAGNodeId(FunId), Data, Ret); - } - - return Ret; -} - -auto analysis::call_graph::computeSCCCallers(const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) - -> SCCCallers { - SCCCallers Ret; - Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); - - llvm::SmallBitVector Roots(SCCs.NumSCCs, true); - - size_t NodeId = 0; - for (const auto &SuccNodes : TAG.Adj) { - auto SrcSCC = SCCs.SCCOfNode[NodeId]; - - for (auto SuccNode : SuccNodes) { - auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; - if (DestSCC != SrcSCC) { - Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); - Roots.reset(uint32_t(DestSCC)); - } - } - - ++NodeId; - } - - Ret.SCCRoots.reserve(Roots.count()); - for (auto Rt : Roots.set_bits()) { - Ret.SCCRoots.push_back(SCCId(Rt)); - } - - return Ret; -} - -void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, - const TypeAssignmentGraph &TAG) { - OS << "digraph SCCTAG {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { - OS << " " << Ctr << "[label=\""; - for (auto TNId : SCCs.NodesInSCC[Ctr]) { - auto TN = TAG.Nodes[TNId]; - printNode(OS, TN); - OS << "\\n"; - } - OS << "\"];\n"; - } - - OS << '\n'; - - size_t Ctr = 0; - for (const auto &Targets : ChildrenOfSCC) { - for (auto Tgt : Targets) { - OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; - } - ++Ctr; - } -} - -auto analysis::call_graph::computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers) - -> SCCOrder { - SCCOrder Ret; - Ret.SCCIds.reserve(SCCs.NumSCCs); - - llvm::SmallBitVector Seen; - Seen.resize(SCCs.NumSCCs); - - auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { - Seen.set(uint32_t(CurrSCC)); - for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { - if (!Seen.test(uint32_t(Caller))) - Dfs(Dfs, Caller); - } - Ret.SCCIds.push_back(CurrSCC); - }; - - for (auto Leaf : Callers.SCCRoots) { - if (!Seen.test(uint32_t(Leaf))) - Dfs(Dfs, Leaf); - } - - std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); - - return Ret; -} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h deleted file mode 100644 index 3643011cb0..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCC.h +++ /dev/null @@ -1,71 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#pragma once - -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/HashBuilder.h" -#include "llvm/Support/raw_ostream.h" - -#include "../../../../../utils/include/phasar/Utils/Compressor.h" - -namespace psr { -class LLVMBasedICFG; -} // namespace psr - -namespace psr::analysis::call_graph { -struct TypeAssignmentGraph; -enum class TAGNodeId : uint32_t; - -enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; - -struct SCCHolder { - llvm::SmallVector SCCOfNode{}; - llvm::SmallVector> NodesInSCC{}; - size_t NumSCCs = 0; -}; - -struct SCCCallers { - llvm::SmallVector, 0> ChildrenOfSCC{}; - llvm::SmallVector SCCRoots{}; - - void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, - const TypeAssignmentGraph &TAG); -}; - -struct SCCOrder { - llvm::SmallVector SCCIds; -}; - -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCHolder -computeSCCs(const TypeAssignmentGraph &TAG); - -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers -computeSCCCallers(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs); - -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder -computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); -} // namespace psr::analysis::call_graph - -namespace llvm { -template <> struct DenseMapInfo { - using SCCId = psr::analysis::call_graph::SCCId; - - static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } - static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } - static inline auto getHashValue(SCCId Id) noexcept { - return llvm::hash_value(uint32_t(Id)); - } - static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } -}; -} // namespace llvm diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp deleted file mode 100644 index b9e6bced3b..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" - -#include "phasar/PhasarLLVM/Utils/Compressor.h" - -#include "llvm/ADT/SmallBitVector.h" - -#include "TypeAssignmentGraph.h" - -#include -#include -#include - -using namespace psr; - -using SCCId = analysis::call_graph::SCCId; - -struct SCCData { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; - llvm::SmallVector Stack; - uint32_t Time = 0; - llvm::SmallBitVector Seen; - - explicit SCCData(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} -}; - -static void setMin(uint32_t &InOut, uint32_t Other) { - if (Other < InOut) - InOut = Other; -} - -// TODO: Non-recursive version -static void computeSCCsRec(const analysis::call_graph::TypeAssignmentGraph &TAG, - analysis::call_graph::TAGNodeId CurrNode, - SCCData &Data, - psr::analysis::call_graph::SCCHolder &Holder) { - // See - // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components - - auto CurrTime = Data.Time++; - Data.Disc[size_t(CurrNode)] = CurrTime; - Data.Low[size_t(CurrNode)] = CurrTime; - Data.Stack.push_back(CurrNode); - Data.OnStack.set(uint32_t(CurrNode)); - - for (auto SuccNode : TAG.Adj[size_t(CurrNode)]) { - if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { - // Tree-edge: Not seen yet --> recurse - - computeSCCsRec(TAG, SuccNode, Data, Holder); - setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); - } else if (Data.OnStack.test(uint32_t(SuccNode))) { - // Back-edge --> circle! - - setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); - } - } - - if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { - // Found SCC - - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != CurrNode) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } -} - -auto analysis::call_graph::computeSCCs(const TypeAssignmentGraph &TAG) - -> SCCHolder { - SCCHolder Ret{}; - - auto NumNodes = TAG.Nodes.size(); - Ret.SCCOfNode.resize(NumNodes); - - if (!NumNodes) - return Ret; - - SCCData Data(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) - computeSCCsRec(TAG, TAGNodeId(FunId), Data, Ret); - } - - return Ret; -} - -auto analysis::call_graph::computeSCCCallers(const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) - -> SCCCallers { - SCCCallers Ret; - Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); - - llvm::SmallBitVector Roots(SCCs.NumSCCs, true); - - size_t NodeId = 0; - for (const auto &SuccNodes : TAG.Adj) { - auto SrcSCC = SCCs.SCCOfNode[NodeId]; - - for (auto SuccNode : SuccNodes) { - auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; - if (DestSCC != SrcSCC) { - Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); - Roots.reset(uint32_t(DestSCC)); - } - } - - ++NodeId; - } - - Ret.SCCRoots.reserve(Roots.count()); - for (auto Rt : Roots.set_bits()) { - Ret.SCCRoots.push_back(SCCId(Rt)); - } - - return Ret; -} - -void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, - const TypeAssignmentGraph &TAG) { - OS << "digraph SCCTAG {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { - OS << " " << Ctr << "[label=\""; - for (auto TNId : SCCs.NodesInSCC[Ctr]) { - auto TN = TAG.Nodes[TNId]; - printNode(OS, TN); - OS << "\\n"; - } - OS << "\"];\n"; - } - - OS << '\n'; - - size_t Ctr = 0; - for (const auto &Targets : ChildrenOfSCC) { - for (auto Tgt : Targets) { - OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; - } - ++Ctr; - } -} - -auto analysis::call_graph::computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers) - -> SCCOrder { - SCCOrder Ret; - Ret.SCCIds.reserve(SCCs.NumSCCs); - - llvm::SmallBitVector Seen; - Seen.resize(SCCs.NumSCCs); - - auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { - Seen.set(uint32_t(CurrSCC)); - for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { - if (!Seen.test(uint32_t(Caller))) - Dfs(Dfs, Caller); - } - Ret.SCCIds.push_back(CurrSCC); - }; - - for (auto Leaf : Callers.SCCRoots) { - if (!Seen.test(uint32_t(Leaf))) - Dfs(Dfs, Leaf); - } - - std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); - - return Ret; -} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h deleted file mode 100644 index 179daaa658..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/SCCGeneric.h +++ /dev/null @@ -1,72 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#pragma once - -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/HashBuilder.h" -#include "llvm/Support/raw_ostream.h" - -#include "../../../../../utils/include/phasar/Utils/Compressor.h" - -namespace psr { -class LLVMBasedICFG; -} // namespace psr - -namespace psr::analysis::call_graph { -// struct TypeAssignmentGraph; -enum class GraphNodeId : uint32_t; - -enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; - -struct SCCHolder { - llvm::SmallVector SCCOfNode{}; - llvm::SmallVector> NodesInSCC{}; - size_t NumSCCs = 0; -}; - -struct SCCCallers { - llvm::SmallVector, 0> ChildrenOfSCC{}; - llvm::SmallVector SCCRoots{}; - - template - void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); -}; - -struct SCCOrder { - llvm::SmallVector SCCIds; -}; - -template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCHolder computeSCCs(const G &Graph); - -template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers -computeSCCCallers(const G &Graph, const SCCHolder &SCCs); - -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder -computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); -} // namespace psr::analysis::call_graph - -namespace llvm { -template <> struct DenseMapInfo { - using SCCId = psr::analysis::call_graph::SCCId; - - static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } - static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } - static inline auto getHashValue(SCCId Id) noexcept { - return llvm::hash_value(uint32_t(Id)); - } - static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } -}; -} // namespace llvm diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp deleted file mode 100644 index af2447b372..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.cpp +++ /dev/null @@ -1,698 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -// #include "TypeAssignmentGraph.h" - -#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" -#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" -#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/Utilities.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Demangle/Demangle.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/raw_ostream.h" - -#include "t2/analysis/FilteredAliasSet.h" -#include "t2/analysis/call_graph/AliasSets.h" - -#include -#include -#include -#include - -using namespace psr; -using namespace psr::analysis::call_graph; - -using TAGAliasHandler = llvm::function_ref; -using TAGAliasInfo = llvm::function_ref; - -static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { - OS << "var-"; - OS.write_escaped(psr::llvmIRToString(Var.Val)); -} - -static void printNodeImpl(llvm::raw_ostream &OS, Field Fld) { - OS << "fld-"; - OS.write_escaped(psr::llvmTypeToString(Fld.Base, true)); - OS << '+' << Fld.ByteOffset; -} - -static void printNodeImpl(llvm::raw_ostream &OS, Return Ret) { - OS << "ret-"; - OS.write_escaped(Ret.Fun->getName()); -} - -void analysis::call_graph::printNode(llvm::raw_ostream &OS, TAGNode TN) { - std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); -} - -static llvm::SmallBitVector -getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { - /// NOTE: Copied from SiLLiS - - llvm::SmallBitVector Ret; - - auto PointerSize = DL.getPointerSize(); - // LOGS("[getPointerIndicesOfType]: " << *Ty ); - auto MaxNumPointers = - !Ty->isSized() ? 1 : DL.getTypeAllocSize(Ty) / PointerSize; - if (!MaxNumPointers) { - return Ret; - } - Ret.resize(MaxNumPointers); - - llvm::SmallVector> WorkList = {{Ty, 0}}; - - while (!WorkList.empty()) { - auto [CurrTy, CurrByteOffs] = WorkList.pop_back_val(); - - if (CurrTy->isPointerTy()) { - size_t Idx = CurrByteOffs / PointerSize; - if (CurrByteOffs % PointerSize) [[unlikely]] { - llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer " - "found at offset " - << CurrByteOffs << " in type " << *Ty; - } - assert(Ret.size() > Idx && - "reserved unsufficient space for pointer indices"); - Ret.set(Idx); - continue; - } - - if (CurrTy->isArrayTy()) { - auto *ElemTy = CurrTy->getArrayElementType(); - auto ArrayLen = CurrTy->getArrayNumElements(); - auto ElemSize = DL.getTypeAllocSize(ElemTy); - for (size_t I = 0, Offs = CurrByteOffs; I < ArrayLen; - ++I, Offs += ElemSize) { - WorkList.emplace_back(ElemTy, Offs); - } - continue; - } - - if (auto *Struct = llvm::dyn_cast(CurrTy)) { - auto NumElems = Struct->getNumElements(); - const auto *SL = DL.getStructLayout(Struct); - for (size_t I = 0; I < NumElems; ++I) { - auto Offs = CurrByteOffs + SL->getElementOffset(I); - WorkList.emplace_back(Struct->getElementType(I), Offs); - } - continue; - } - } - - return Ret; -} - -static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { - TAG.Nodes.getOrInsert(TN); -} - -static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, - const llvm::DataLayout &DL) { - auto &&Structs = Mod.getIdentifiedStructTypes(); - TAG.Nodes.reserve(TAG.Nodes.size() + Structs.size()); - - size_t PointerSize = DL.getPointerSize(); - - for (auto *ST : Structs) { - auto Offsets = getPointerIndicesOfType(ST, DL); - for (auto Offs : Offsets.set_bits()) { - addTAGNode({Field{ST, Offs * PointerSize}}, TAG); - } - addTAGNode({Field{ST, SIZE_MAX}}, TAG); - } -} - -static void addGlobals(const llvm::Module &Mod, TypeAssignmentGraph &TAG) { - auto NumGlobals = Mod.global_size(); - TAG.Nodes.reserve(TAG.Nodes.size() + NumGlobals); - - for (const auto &Glob : Mod.globals()) { - if (Glob.getValueType()->isIntOrIntVectorTy() || - Glob.getValueType()->isFloatingPointTy()) { - continue; - } - auto GlobName = Glob.getName(); - if (GlobName.startswith("_ZTV") || GlobName.startswith("_ZTI") || - GlobName.startswith("_ZTS")) { - continue; - } - - addTAGNode({Variable{&Glob}}, TAG); - } -} - -static void initializeWithFun(const llvm::Function *Fun, - TypeAssignmentGraph &TAG) { - // Add all params - // Add all locals - // Add return - - if (Fun->isDeclaration()) - return; - - for (const auto &Arg : Fun->args()) { - if (!Arg.getType()->isPointerTy()) - continue; - - addTAGNode({Variable{&Arg}}, TAG); - } - - for (const auto &I : llvm::instructions(Fun)) { - if (!I.getType()->isPointerTy()) { - // TODO: What about SSA structs that contain pointers? - continue; - } - - if (const auto *Alloca = llvm::dyn_cast(&I)) { - if (Alloca->getAllocatedType()->isIntOrIntVectorTy() || - Alloca->getAllocatedType()->isFloatingPointTy()) { - continue; - } - } - - addTAGNode({Variable{&I}}, TAG); - } - - if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) - addTAGNode({Return{Fun}}, TAG); -} - -[[nodiscard]] static bool isVTableOrFun(const llvm::Value *Val) { - const auto *Base = Val->stripPointerCastsAndAliases(); - if (llvm::isa(Base)) - return true; - - if (const auto *Glob = llvm::dyn_cast(Base)) - return Glob->isConstant() && Glob->getName().startswith("_ZTV"); - - return false; -} - -static void handleAlloca(const llvm::AllocaInst *Alloca, - TypeAssignmentGraph &TAG, - const psr::LLVMVFTableProvider &VTP) { - auto TN = TAG.get({Variable{Alloca}}); - if (!TN) - return; - - const auto *AllocTy = - llvm::dyn_cast(Alloca->getAllocatedType()); - if (!AllocTy) - return; - - if (const auto *TV = VTP.getVFTableGlobal(AllocTy)) { - TAG.TypeEntryPoints[*TN].insert(TV); - } -} - -static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, - TypeAssignmentGraph &TAG, - const llvm::DataLayout &DL) { - auto Offs = [&]() -> size_t { - llvm::APInt Offs(64, 0); - if (GEP->accumulateConstantOffset(DL, Offs)) { - return Offs.getZExtValue(); - } - return SIZE_MAX; - }(); - - return TAG.get({Field{GEP->getSourceElementType(), Offs}}); -} - -static void handleGEP(const llvm::GetElementPtrInst *GEP, - TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { - auto To = TAG.get({Variable{GEP}}); - if (!To) - return; - - if (!GEP->isInBounds()) { - auto From = TAG.get({Variable{GEP->getPointerOperand()}}); - - if (From && To) - TAG.addEdge(*From, *To); - - return; - } - // TODO: Is this correct? -- also check load - - auto From = getGEPNode(GEP, TAG, DL); - if (From) - TAG.addEdge(*From, *To); -} - -static bool handleEntryForStore(const llvm::StoreInst *Store, - TypeAssignmentGraph &TAG, TAGAliasInfo AI, - const llvm::DataLayout &DL) { - const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); - bool IsEntry = isVTableOrFun(Base); - - if (!IsEntry) - return false; - - if (const auto *GEPDest = - llvm::dyn_cast(Store->getPointerOperand())) { - if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { - TAG.TypeEntryPoints[*GEPNodeId].insert(Base); - - auto GEPNode = TAG[*GEPNodeId]; - if (const auto *FldDest = std::get_if(&GEPNode.Label)) { - auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); - - if (ApproxDest) - TAG.TypeEntryPoints[*ApproxDest].insert(Base); - } - } - } - - AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // TODO: Fuse store and GEP! - - auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) - return; - - TAG.TypeEntryPoints[*DestNodeId].insert(Base); - }); - return true; -} - -static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, - TAGAliasInfo AI, const llvm::DataLayout &DL) { - - if (handleEntryForStore(Store, TAG, AI, DL)) - return; - - auto From = TAG.get({Variable{Store->getValueOperand()}}); - if (!From) - return; - - if (const auto *GEPDest = - llvm::dyn_cast(Store->getPointerOperand())) { - if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { - TAG.addEdge(*From, *GEPNodeId); - - auto GEPNode = TAG[*GEPNodeId]; - if (const auto *FldDest = std::get_if(&GEPNode.Label)) { - auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); - - if (ApproxDest) - TAG.addEdge(*From, *ApproxDest); - } - } - } - - AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // TODO: Fuse store and GEP! - - auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) - return; - - TAG.addEdge(*From, *DestNodeId); - }); -} - -static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, - const llvm::DataLayout &DL) { - auto To = TAG.get({Variable{Load}}); - if (!To) - return; - - auto From = TAG.get({Variable{Load->getPointerOperand()}}); - if (From) - TAG.addEdge(*From, *To); - - if (const auto *GEPDest = - llvm::dyn_cast(Load->getPointerOperand())) { - if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) - TAG.addEdge(*GEPNodeId, *To); - } -} - -static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { - auto To = TAG.get({Variable{Phi}}); - if (!To) - return; - - for (const auto &Inc : Phi->incoming_values()) { - auto From = TAG.get({Variable{Inc.get()}}); - if (From) - TAG.addEdge(*From, *To); - } -} - -static llvm::StringRef extractTypeName(llvm::StringRef CtorName) { - // Example: _ZN3OneC2Ev - - auto EndIdx = CtorName.rfind("C2E"); - if (EndIdx == llvm::StringRef::npos) - EndIdx = CtorName.rfind("C1E"); - - if (EndIdx == llvm::StringRef::npos) - EndIdx = CtorName.size(); - - auto StartIdx = EndIdx; - while (StartIdx) { - --StartIdx; - - if (llvm::isDigit(CtorName[StartIdx])) - break; - } - return CtorName.slice(StartIdx, EndIdx); -} -static llvm::StringRef extractTypeName(std::string &&) = delete; - -static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, - const llvm::Module &Mod, - const psr::LLVMVFTableProvider &VTP) { - if (!CompTy->getIdentifier().empty()) { - - std::string Buf; - auto TypeName = CompTy->getIdentifier(); - if (TypeName.startswith("_ZTS") || TypeName.startswith("_ZTI")) { - Buf = TypeName.str(); - Buf[3] = 'V'; - TypeName = Buf; - } - - if (const auto *GlobTV = Mod.getNamedGlobal(TypeName)) { - return GlobTV; - } - if (const auto *Alias = Mod.getNamedAlias(TypeName)) { - return Alias->getAliasee()->stripPointerCastsAndAliases(); - } - - return nullptr; - } - - auto ClearName = CompTy->getName().str(); - const auto *Scope = CompTy->getScope(); - while (llvm::isa_and_nonnull(Scope)) { - ClearName = Scope->getName().str().append("::").append(ClearName); - Scope = Scope->getScope(); - } - - return VTP.getVFTableGlobal(ClearName); -} - -static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, - TypeAssignmentGraph &TAG, - const llvm::Function *Callee, - const psr::LLVMVFTableProvider &VTP) { - - if (!psr::isHeapAllocatingFunction(Callee)) - return; - - if (const auto *MDNode = Call->getMetadata("heapallocsite")) { - - // Shortcut - if (const auto *CompTy = llvm::dyn_cast(MDNode); - CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || - CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { - - if (const auto *Ty = getTypeFromDI(CompTy, *Call->getModule(), VTP)) { - - TAG.TypeEntryPoints[CSNod].insert(Ty); - return; - } - } - } - // TODO: Fallback solution - - // llvm::SmallDenseSet Seen; - // llvm::SmallVector WL = {Call}; - - // // Search for the ctor call - - // const auto *CallerFun = Call->getFunction(); - - // while (!WL.empty()) { - // const auto *CurrObj = WL.pop_back_val(); - // for (const auto &Use : CurrObj->uses()) { - // const auto *User = llvm::dyn_cast(Use.getUser()); - // if (!User || User->getFunction() != CallerFun) - // continue; - - // if (const auto *Cast = llvm::dyn_cast(User); - // Cast && Cast->getDestTy()->isPointerTy()) { - // if (Seen.insert(Cast).second) - // WL.push_back(Cast); - - // continue; - // } - - // if (const auto *CtorCall = llvm::dyn_cast(User); - // CtorCall && CtorCall->getCalledFunction() && - // Use == CtorCall->getArgOperand(0)) { - // auto CtorName = CtorCall->getCalledFunction()->getName(); - // if (psr::isConstructor(CtorName)) { - // auto DemangledCtorName = llvm::demangle(CtorName.str()); - - // auto TypeName = extractTypeName(CtorName); - - // // TODO - // } - // // TODO: Extract type from ctor fun - // } - // } - // } -} - -static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, - const psr::CallGraph &BaseCG, - const psr::LLVMVFTableProvider &VTP) { - - llvm::SmallVector> Args; - llvm::SmallBitVector EntryArgs; - bool HasArgNode = false; - - for (const auto &Arg : Call->args()) { - auto TN = TAG.get({Variable{Arg.get()}}); - Args.push_back(TN); - if (TN) - HasArgNode = true; - - bool IsEntry = isVTableOrFun(Arg.get()); - EntryArgs.push_back(IsEntry); - } - - auto CSNod = TAG.get({Variable{Call}}); - - // TODO: Handle struct returns that contain pointers - if (!HasArgNode && !CSNod) - return; - - for (const auto *Callee : BaseCG.getCalleesOfCallAt(Call)) { - handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); - - for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { - auto ParamNodId = TAG.get({Variable{&Param}}); - if (!ParamNodId) - continue; - - if (EntryArgs.test(Param.getArgNo())) { - TAG.TypeEntryPoints[*ParamNodId].insert( - Call->getArgOperand(Param.getArgNo()) - ->stripPointerCastsAndAliases()); - } - - if (!Arg) - continue; - - if (!Param.hasStructRetAttr()) - TAG.addEdge(*Arg, *ParamNodId); - - // if (!Param.hasByValAttr()) - // TAG.addEdge(*ParamNodId, *Arg); - } - if (CSNod) { - auto RetNod = TAG.get({Return{Callee}}); - if (RetNod) - TAG.addEdge(*RetNod, *CSNod); - } - } -} - -static void handleReturn(const llvm::ReturnInst *Ret, - TypeAssignmentGraph &TAG) { - - auto TNId = TAG.get({Return{Ret->getFunction()}}); - if (!TNId) - return; - - if (const auto *RetVal = Ret->getReturnValue()) { - const auto *Base = RetVal->stripPointerCastsAndAliases(); - if (isVTableOrFun(Base)) { - TAG.TypeEntryPoints[*TNId].insert(Base); - return; - } - - auto From = TAG.get({Variable{Base}}); - if (From) - TAG.addEdge(*From, *TNId); - } -} - -static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, - const psr::CallGraph &BaseCG, - TAGAliasInfo AI, const llvm::DataLayout &DL, - const psr::LLVMVFTableProvider &VTP) { - if (const auto *Alloca = llvm::dyn_cast(&I)) { - handleAlloca(Alloca, TAG, VTP); - return; - } - if (const auto *Load = llvm::dyn_cast(&I)) { - handleLoad(Load, TAG, DL); - return; - } - if (const auto *GEP = llvm::dyn_cast(&I)) { - handleGEP(GEP, TAG, DL); - return; - } - if (const auto *Store = llvm::dyn_cast(&I)) { - handleStore(Store, TAG, AI, DL); - return; - } - if (const auto *Phi = llvm::dyn_cast(&I)) { - handlePhi(Phi, TAG); - return; - } - if (const auto *Cast = llvm::dyn_cast(&I)) { - auto From = TAG.get({Variable{Cast->getOperand(0)}}); - auto To = TAG.get({Variable{Cast}}); - - if (From && To) - TAG.addEdge(*From, *To); - } - if (const auto *Call = llvm::dyn_cast(&I)) { - handleCall(Call, TAG, BaseCG, VTP); - return; - } - if (const auto *Ret = llvm::dyn_cast(&I)) { - handleReturn(Ret, TAG); - return; - } - // TODO: Handle more cases -} - -static void buildTAGWithFun( - const llvm::Function *Fun, TypeAssignmentGraph &TAG, - const psr::CallGraph - &BaseCG, - TAGAliasInfo AI, const llvm::DataLayout &DL, - const psr::LLVMVFTableProvider &VTP) { - for (const auto &I : llvm::instructions(Fun)) { - dispatch(I, TAG, BaseCG, AI, DL, VTP); - } -} - -static auto computeTypeAssignmentGraphImpl( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - TAGAliasInfo AI, const psr::LLVMVFTableProvider &VTP) - -> TypeAssignmentGraph { - TypeAssignmentGraph TAG; - - const auto &DL = Mod.getDataLayout(); - - addFields(Mod, TAG, DL); - addGlobals(Mod, TAG); - - for (const auto &Fun : Mod) { - initializeWithFun(&Fun, TAG); - } - - TAG.Adj.resize(TAG.Nodes.size()); - - for (const auto &Fun : Mod) { - buildTAGWithFun(&Fun, TAG, BaseCG, AI, DL, VTP); - } - - return TAG; -} - -auto analysis::call_graph::computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP) - -> TypeAssignmentGraph { - FilteredAliasSet FAS(AS); - return computeTypeAssignmentGraphImpl( - Mod, BaseCG, - [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { - FAS.foreachAlias(Fact, At, Handler); - }, - VTP); -} - -auto analysis::call_graph::computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP) - -> TypeAssignmentGraph { - AliasInfo AI(&ObjGraph); - FilteredAliasSet FAS(AI.aliases()); - return computeTypeAssignmentGraphImpl( - Mod, BaseCG, - [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { - FAS.foreachAlias(Fact, At, Handler); - }, - VTP); -} - -void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { - OS << "digraph TAG {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - - size_t Ctr = 0; - for (const auto &TN : Nodes) { - OS << " " << Ctr << "[label=\""; - printNode(OS, TN); - OS << "\"];\n"; - - ++Ctr; - } - - OS << '\n'; - - Ctr = 0; - for (const auto &Targets : Adj) { - for (auto Tgt : Targets) { - OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; - } - ++Ctr; - } -} diff --git a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h b/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h deleted file mode 100644 index 1022fe00b7..0000000000 --- a/phasar/llvm/include/phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h +++ /dev/null @@ -1,150 +0,0 @@ -#pragma once - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/HashBuilder.h" -#include "llvm/Support/raw_ostream.h" - -#include "../../../../../utils/include/phasar/Utils/Compressor.h" -#include "../ControlFlow/CallGraph.h" -#include "../ControlFlow/LLVMVFTableProvider.h" -#include "../Pointer/LLVMAliasInfo.h" -#include "../TypeHierarchy/LLVMTypeHierarchy.h" - -#include -#include - -namespace psr { -class FilteredAliasSet; -} // namespace psr - -namespace psr::analysis::call_graph { - -enum class [[clang::enum_extensibility(open)]] TAGNodeId : uint32_t{}; - -struct Variable { - const llvm::Value *Val; -}; - -struct Field { - const llvm::Type *Base; - size_t ByteOffset; -}; - -struct Return { - const llvm::Function *Fun; -}; - -struct TAGNode { - std::variant Label; -}; - -constexpr bool operator==(Variable L, Variable R) noexcept { - return L.Val == R.Val; -} -constexpr bool operator==(Field L, Field R) noexcept { - return L.Base == R.Base && L.ByteOffset == R.ByteOffset; -} -constexpr bool operator==(Return L, Return R) noexcept { - return L.Fun == R.Fun; -} -constexpr bool operator==(TAGNode L, TAGNode R) noexcept { - return L.Label == R.Label; -} -}; // namespace psr::analysis::call_graph - -namespace llvm { -template <> struct DenseMapInfo { - using TAGNode = psr::analysis::call_graph::TAGNode; - using Variable = psr::analysis::call_graph::Variable; - using Field = psr::analysis::call_graph::Field; - using Return = psr::analysis::call_graph::Return; - - inline static TAGNode getEmptyKey() noexcept { - return {Variable{llvm::DenseMapInfo::getEmptyKey()}}; - } - inline static TAGNode getTombstoneKey() noexcept { - return { - Variable{llvm::DenseMapInfo::getTombstoneKey()}}; - } - inline static bool isEqual(TAGNode L, TAGNode R) noexcept { return L == R; } - inline static auto getHashValue(TAGNode TN) noexcept { - if (const auto *Var = std::get_if(&TN.Label)) { - return llvm::hash_combine(0, Var->Val); - } - if (const auto *Fld = std::get_if(&TN.Label)) { - return llvm::hash_combine(1, Fld->Base, Fld->ByteOffset); - } - if (const auto *Ret = std::get_if(&TN.Label)) { - return llvm::hash_combine(2, Ret->Fun); - } - llvm_unreachable("All TAGNode variants should be handled already"); - } -}; - -template <> struct DenseMapInfo { - using TAGNodeId = psr::analysis::call_graph::TAGNodeId; - inline static TAGNodeId getEmptyKey() noexcept { return TAGNodeId(-1); } - inline static TAGNodeId getTombstoneKey() noexcept { return TAGNodeId(-2); } - inline static bool isEqual(TAGNodeId L, TAGNodeId R) noexcept { - return L == R; - } - inline static auto getHashValue(TAGNodeId TN) noexcept { - return llvm::hash_value(uint32_t(TN)); - } -}; - -} // namespace llvm - -namespace psr::analysis::call_graph { -struct ObjectGraph; - -struct TypeAssignmentGraph { - - Compressor Nodes; - - llvm::SmallVector, 0> Adj; - llvm::SmallDenseMap> - TypeEntryPoints; - - [[nodiscard]] inline std::optional get(TAGNode TN) const noexcept { - return Nodes.getOrNull(TN); - } - - [[nodiscard]] inline TAGNode operator[](TAGNodeId Id) const noexcept { - return Nodes[Id]; - } - - inline void addEdge(TAGNodeId From, TAGNodeId To) { - assert(size_t(From) < Adj.size()); - assert(size_t(To) < Adj.size()); - - if (From == To) - return; - - Adj[size_t(From)].insert(To); - } - - void print(llvm::raw_ostream &OS); -}; - -[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP); - -[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP); - -void printNode(llvm::raw_ostream &OS, TAGNode TN); -}; // namespace psr::analysis::call_graph diff --git a/phasar/utils/include/phasar/Utils/Compressor.h b/phasar/utils/include/phasar/Utils/Compressor.h deleted file mode 100644 index edf87095e5..0000000000 --- a/phasar/utils/include/phasar/Utils/Compressor.h +++ /dev/null @@ -1,212 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#pragma once - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/SmallVector.h" - -#include "ByRef.h" -#include "TypeTraits.h" - -#include -#include -#include -#include -#include - -namespace psr { -template -class Compressor; - -template -class Compressor>> { -public: - void reserve(size_t Capacity) { - assert(Capacity <= UINT32_MAX); - ToInt.reserve(Capacity); - FromInt.reserve(Capacity); - } - - Id getOrInsert(T Elem) { - auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); - if (Inserted) { - FromInt.push_back(Elem); - } - return It->second; - } - - std::pair insert(T Elem) { - auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); - if (Inserted) { - FromInt.push_back(Elem); - } - return {It->second, Inserted}; - } - - [[nodiscard]] std::optional getOrNull(T Elem) const { - if (auto It = ToInt.find(Elem); It != ToInt.end()) { - return It->second; - } - return std::nullopt; - } - - [[nodiscard]] Id get(T Elem) const { - auto It = ToInt.find(Elem); - assert(It != ToInt.end()); - return It->second; - } - - [[nodiscard]] T operator[](Id Idx) const noexcept { - assert(size_t(Idx) < FromInt.size()); - return FromInt[size_t(Idx)]; - } - - [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } - [[nodiscard]] size_t capacity() const noexcept { - return FromInt.capacity() + - ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); - } - - auto begin() const noexcept { return FromInt.begin(); } - auto end() const noexcept { return FromInt.end(); } - -private: - llvm::DenseMap ToInt; - llvm::SmallVector FromInt; -}; - -template -class Compressor>> { -public: - void reserve(size_t Capacity) { - assert(Capacity <= UINT32_MAX); - ToInt.reserve(Capacity); - } - - Id getOrInsert(const T &Elem) { - if (auto It = ToInt.find(&Elem); It != ToInt.end()) { - return It->second; - } - auto Ret = Id(FromInt.size()); - auto *Ins = &FromInt.emplace_back(Elem); - ToInt[Ins] = Ret; - return Ret; - } - - Id getOrInsert(T &&Elem) { - if (auto It = ToInt.find(&Elem); It != ToInt.end()) { - return It->second; - } - auto Ret = Id(FromInt.size()); - auto *Ins = &FromInt.emplace_back(std::move(Elem)); - ToInt[Ins] = Ret; - return Ret; - } - - std::pair insert(const T &Elem) { - if (auto It = ToInt.find(&Elem); It != ToInt.end()) { - return {It->second, false}; - } - auto Ret = Id(FromInt.size()); - auto *Ins = &FromInt.emplace_back(Elem); - ToInt[Ins] = Ret; - return {Ret, true}; - } - - std::pair insert(T &&Elem) { - if (auto It = ToInt.find(&Elem); It != ToInt.end()) { - return {It->second, false}; - } - auto Ret = Id(FromInt.size()); - auto *Ins = &FromInt.emplace_back(std::move(Elem)); - ToInt[Ins] = Ret; - return {Ret, true}; - } - - [[nodiscard]] std::optional getOrNull(const T &Elem) const { - if (auto It = ToInt.find(&Elem); It != ToInt.end()) { - return It->second; - } - return std::nullopt; - } - - [[nodiscard]] Id get(const T &Elem) const { - auto It = ToInt.find(&Elem); - assert(It != ToInt.end()); - return It->second; - } - - const T &operator[](Id Idx) const noexcept { - assert(size_t(Idx) < FromInt.size()); - return FromInt[size_t(Idx)]; - } - - [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } - [[nodiscard]] size_t capacity() const noexcept { - return FromInt.size() + - ToInt.getMemorySize() / sizeof(typename decltype(ToInt)::value_type); - } - - auto begin() const noexcept { return FromInt.begin(); } - auto end() const noexcept { return FromInt.end(); } - -private: - struct DSI : llvm::DenseMapInfo { - static auto getHashValue(const T *Elem) noexcept { - assert(Elem != nullptr); - if constexpr (has_llvm_dense_map_info) { - return llvm::DenseMapInfo::getHashValue(*Elem); - } else { - return std::hash{}(*Elem); - } - } - static auto isEqual(const T *LHS, const T *RHS) noexcept { - if (LHS == RHS) { - return true; - } - if (LHS == DSI::getEmptyKey() || LHS == DSI::getTombstoneKey() || - RHS == DSI::getEmptyKey() || RHS == DSI::getTombstoneKey()) { - return false; - } - if constexpr (has_llvm_dense_map_info) { - return llvm::DenseMapInfo::isEqual(*LHS, *RHS); - } else { - return *LHS == *RHS; - } - } - }; - - std::deque FromInt; - llvm::DenseMap ToInt; -}; - -struct NoneCompressor final { - constexpr NoneCompressor() noexcept = default; - - template >> - constexpr NoneCompressor(const T & /*unused*/) noexcept {} - - template - [[nodiscard]] decltype(auto) getOrInsert(T &&Val) const noexcept { - return std::forward(Val); - } - template - [[nodiscard]] decltype(auto) operator[](T &&Val) const noexcept { - return std::forward(Val); - } - void reserve(size_t /*unused*/) const noexcept {} - - [[nodiscard]] size_t size() const noexcept { return 0; } - [[nodiscard]] size_t capacity() const noexcept { return 0; } -}; - -} // namespace psr diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp b/unittests/Utils/SCCGenericTest.cpp similarity index 100% rename from lib/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.cpp rename to unittests/Utils/SCCGenericTest.cpp From ee6d359b4abbeeb79148e62a3baaa4abdfb181de Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Sun, 1 Sep 2024 23:43:49 +0200 Subject: [PATCH 05/27] updated template types --- .../PhasarLLVM/ControlFlow/TypePropagator.h | 20 +++--- .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 1 - .../DataFlow/IfdsIde/TypePropagator.cpp | 24 ++++--- unittests/Utils/SCCGenericTest.cpp | 72 ++++++++++--------- 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h index 7984d80a4e..400a9d61ea 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h @@ -9,6 +9,8 @@ #pragma once +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" + #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" @@ -20,20 +22,22 @@ class Value; namespace psr::analysis::call_graph { struct TypeAssignmentGraph; -struct SCCHolder; -struct SCCCallers; +template struct SCCHolder; +template struct SCCCallers; struct SCCOrder; struct TypeAssignment { llvm::SmallVector, 0> TypesPerSCC; - LLVM_LIBRARY_VISIBILITY void print(llvm::raw_ostream &OS, - const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs); + LLVM_LIBRARY_VISIBILITY void + print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs); }; -[[nodiscard]] LLVM_LIBRARY_VISIBILITY TypeAssignment -propagateTypes(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, - const SCCCallers &Deps, const SCCOrder &Order); +[[nodiscard]] LLVM_LIBRARY_VISIBILITY TypeAssignment propagateTypes( + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs, + const SCCCallers &Deps, + const SCCOrder &Order); } // namespace psr::analysis::call_graph diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h index f7250a6948..5a29b01d8f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h @@ -161,7 +161,6 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, } // Curr.second > 0 implies that we came back from a recursive call if (Curr.second > 0) { - //??? setMin(Data.Low[size_t(Curr.first)], Data.Low[size_t(Curr.second) - 1]); } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp index ffa934bedb..b032b19660 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp @@ -13,21 +13,23 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" #include "phasar/PhasarLLVM/Utils/Compressor.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" using namespace psr; using namespace psr::analysis::call_graph; -static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { +static void +initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { for (const auto &[Node, Types] : TAG.TypeEntryPoints) { auto SCC = SCCs.SCCOfNode[size_t(Node)]; TA.TypesPerSCC[size_t(SCC)].insert(Types.begin(), Types.end()); } } -static void propagate(TypeAssignment &TA, const SCCCallers &Deps, - SCCId CurrSCC) { +static void +propagate(TypeAssignment &TA, + const SCCCallers &Deps, + SCCId CurrSCC) { const auto &Types = TA.TypesPerSCC[size_t(CurrSCC)]; if (Types.empty()) return; @@ -38,8 +40,10 @@ static void propagate(TypeAssignment &TA, const SCCCallers &Deps, } TypeAssignment analysis::call_graph::propagateTypes( - const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, - const SCCCallers &Deps, const SCCOrder &Order) { + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs, + const SCCCallers &Deps, + const SCCOrder &Order) { TypeAssignment Ret; Ret.TypesPerSCC.resize(SCCs.NumSCCs); @@ -51,9 +55,9 @@ TypeAssignment analysis::call_graph::propagateTypes( return Ret; } -void TypeAssignment::print(llvm::raw_ostream &OS, - const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { +void TypeAssignment::print( + llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { OS << "digraph TypeAssignment {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index f973b1216c..1148b5121d 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -7,54 +7,58 @@ * Fabian Schiebel and other *****************************************************************************/ -// #include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" -#include "SCCGeneric.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" -// #include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" +#include "gtest/gtest.h" -#include - -// #include "phasar/PhasarLLVM/Utils/Compressor.h" -// #include "phasar/PhasarLLVM/Utils/TypeAssignmentGraph.h" - -#include "llvm/ADT/SmallBitVector.h" - -#include -#include #include +#include + +//===----------------------------------------------------------------------===// +// Unit tests for the Igeneric SCC algorithm using namespace psr; using SCCId = analysis::call_graph::SCCId; +enum class [[clang::enum_extensibility(open)]] NodeId : uint32_t{}; class ExampleGraph { public: + using GraphNodeId = NodeId; + ExampleGraph() = default; - std::vector - getEdges(const analysis::call_graph::GraphNodeId ID) const { + [[nodiscard]] std::vector getEdges(const GraphNodeId ID) const { return Adj[uint32_t(ID)]; } - std::vector> Adj; + std::vector> Adj; }; -int main() { - ExampleGraph Gr; - std::vector> list = { - {analysis::call_graph::GraphNodeId(2)}, - {analysis::call_graph::GraphNodeId(0)}, - {analysis::call_graph::GraphNodeId(1)}, - {analysis::call_graph::GraphNodeId(1), - analysis::call_graph::GraphNodeId(2)}, - {analysis::call_graph::GraphNodeId(1)}, - {analysis::call_graph::GraphNodeId(4), - analysis::call_graph::GraphNodeId(6)}, - {analysis::call_graph::GraphNodeId(4), - analysis::call_graph::GraphNodeId(7)}, - {analysis::call_graph::GraphNodeId(5)}}; - - auto Output = analysis::call_graph::execTarjan(Gr, false); - std::cout << Output.NumSCCs; - auto Out = analysis::call_graph::execTarjan(Gr, true); - std::cout << Out.NumSCCs; +TEST(SCCGenericTest, SCCTest) { + ExampleGraph Graph; + std::vector> list = { + {ExampleGraph::GraphNodeId(2)}, + {ExampleGraph::GraphNodeId(0)}, + {ExampleGraph::GraphNodeId(1)}, + {ExampleGraph::GraphNodeId(1), ExampleGraph::GraphNodeId(2)}, + {ExampleGraph::GraphNodeId(1)}, + {ExampleGraph::GraphNodeId(4), ExampleGraph::GraphNodeId(6)}, + {ExampleGraph::GraphNodeId(4), ExampleGraph::GraphNodeId(7)}, + {ExampleGraph::GraphNodeId(5)}}; + + auto OutputRec = analysis::call_graph::execTarjan(Graph, false); + auto OutputIt = analysis::call_graph::execTarjan(Graph, true); + ASSERT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) + << "Unequal number of SCC components\n" + << "Abort Test\n"; + for (int ID = 0; ID < Graph.Adj.size(); ID++) { + EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) + << "SCCs differ at Index: " << std::to_string(ID) << "\n"; + } +} + +// main function for the test case +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); } From 62bb414edd1bfd590e990c82d1d27092a4aca7b1 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Wed, 4 Sep 2024 13:31:44 +0200 Subject: [PATCH 06/27] error corrections --- .../ControlFlow/LLVMVFTableProvider.h | 9 ++ .../ControlFlow/TypeAssignmentGraph.h | 7 +- .../PhasarLLVM/ControlFlow/TypePropagator.h | 7 +- .../PhasarLLVM/Utils/FilteredAliasSet.h | 10 +- .../{PhasarLLVM => }/Utils/Compressor.h | 5 +- include/phasar/Utils/SCCGeneric.h | 146 ++++++++---------- include/phasar/Utils/TypeTraits.h | 3 +- .../ControlFlow/LLVMVFTableProvider.cpp | 15 ++ .../DataFlow/IfdsIde/TypeAssignmentGraph.cpp | 8 +- .../DataFlow/IfdsIde/TypePropagator.cpp | 17 +- lib/PhasarLLVM/Utils/FilteredAliasSet.cpp | 121 +++++++++++++++ unittests/Utils/CMakeLists.txt | 1 + unittests/Utils/SCCGenericTest.cpp | 33 ++-- 13 files changed, 267 insertions(+), 115 deletions(-) rename include/phasar/{PhasarLLVM => }/Utils/Compressor.h (98%) create mode 100644 lib/PhasarLLVM/Utils/FilteredAliasSet.cpp diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 4206070fc1..a7b5c3c595 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -14,6 +14,8 @@ #include +#include + namespace llvm { class Module; class StructType; @@ -32,8 +34,15 @@ class LLVMVFTableProvider { [[nodiscard]] const LLVMVFTable * getVFTableOrNull(const llvm::StructType *Type) const; + [[nodiscard]] const llvm::GlobalVariable * + getVFTableGlobal(const llvm::StructType *Type) const; + + [[nodiscard]] const llvm::GlobalVariable * + getVFTableGlobal(const std::string &ClearTypeName) const; + private: std::unordered_map TypeVFTMap; + llvm::StringMap ClearNameTVMap; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h index 667b23ac42..0a32fd465d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h @@ -7,13 +7,14 @@ * Fabian Schiebel and other *****************************************************************************/ -#pragma once +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_TYPEASSIGNMENTGRAPH_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEASSIGNMENTGRAPH_H #include "phasar/ControlFlow/CallGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" -#include "phasar/PhasarLLVM/Utils/Compressor.h" +#include "phasar/Utils/Compressor.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -160,3 +161,5 @@ struct TypeAssignmentGraph { void printNode(llvm::raw_ostream &OS, TAGNode TN); }; // namespace psr::analysis::call_graph + +#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h index 400a9d61ea..7896c86335 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h @@ -7,7 +7,8 @@ * Fabian Schiebel and other *****************************************************************************/ -#pragma once +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H #include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" @@ -37,7 +38,9 @@ struct TypeAssignment { [[nodiscard]] LLVM_LIBRARY_VISIBILITY TypeAssignment propagateTypes( const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, - const SCCCallers &Deps, + const SCCCallers + &Deps, const SCCOrder &Order); } // namespace psr::analysis::call_graph +#endif diff --git a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h index eb62bf077f..feb532f6e1 100644 --- a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h +++ b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h @@ -7,10 +7,10 @@ * Fabian Schiebel and other *****************************************************************************/ -// #include "TypeAssignmentGraph.h" - -#pragma once +#ifndef PHASAR_PHASARLLVM_UTILS_FILTEREDALIASSET_H +#define PHASAR_PHASARLLVM_UTILS_FILTEREDALIASSET_H +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/ADT/DenseSet.h" @@ -25,7 +25,7 @@ class Instruction; } // namespace llvm namespace psr { -// Belongs into phasar! + class FilteredAliasSet { public: using d_t = const llvm::Value *; @@ -64,3 +64,5 @@ class FilteredAliasSet { alias_info_t PT; }; } // namespace psr + +#endif diff --git a/include/phasar/PhasarLLVM/Utils/Compressor.h b/include/phasar/Utils/Compressor.h similarity index 98% rename from include/phasar/PhasarLLVM/Utils/Compressor.h rename to include/phasar/Utils/Compressor.h index 1218c228e7..2898393941 100644 --- a/include/phasar/PhasarLLVM/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -7,7 +7,9 @@ * Fabian Schiebel and other *****************************************************************************/ -// nach phasar utils +#ifndef PHASAR_UTILS_COMPRESSOR_H +#define PHASAR_UTILS_COMPRESSOR_H + #include "phasar/Utils/ByRef.h" #include "phasar/Utils/TypeTraits.h" @@ -209,3 +211,4 @@ struct NoneCompressor final { }; } // namespace psr +#endif diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 197acf15e5..12a82d5ac3 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -7,13 +7,13 @@ * Fabian Schiebel and other *****************************************************************************/ -#pragma once -// error in included header Compressor.h -// #include "phasar/PhasarLLVM/Utils/Compressor.h" +#ifndef PHASAR_UTILS_SCCGENERIC_H +#define PHASAR_UTILS_SCCGENERIC_H -#include "phasar/Utils/Utilities.h" +#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" @@ -29,13 +29,11 @@ class LLVMBasedICFG; } // namespace psr namespace psr::analysis::call_graph { -// struct TypeAssignmentGraph; -enum class GraphNodeId : uint32_t; enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; // holds the scc's of a given graph -struct SCCHolder { +template struct SCCHolder { llvm::SmallVector SCCOfNode{}; llvm::SmallVector> NodesInSCC{}; size_t NumSCCs = 0; @@ -43,12 +41,12 @@ struct SCCHolder { // holds a graph were the scc's are compressed to a single node. Resulting graph // is a DAG -struct SCCCallers { +template struct SCCCallers { llvm::SmallVector, 0> ChildrenOfSCC{}; llvm::SmallVector SCCRoots{}; - template - void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); + void print(llvm::raw_ostream &OS, + const SCCHolder &SCCs, const G &Graph); }; // holds topologically sorted scccallers @@ -56,7 +54,7 @@ struct SCCOrder { llvm::SmallVector SCCIds; }; -struct SCCData { +template struct SCCData { llvm::SmallVector Disc; llvm::SmallVector Low; llvm::SmallBitVector OnStack; @@ -69,7 +67,7 @@ struct SCCData { Seen(NumFuns) {} }; -struct SCCDataIt { +template struct SCCDataIt { llvm::SmallVector Disc; llvm::SmallVector Low; llvm::SmallBitVector OnStack; @@ -83,7 +81,7 @@ struct SCCDataIt { Seen(NumFuns) {} }; -static void setMin(uint32_t &InOut, uint32_t Other) { +constexpr void setMin(uint32_t &InOut, uint32_t Other) { if (Other < InOut) { InOut = Other; } @@ -91,8 +89,9 @@ static void setMin(uint32_t &InOut, uint32_t Other) { // TODO: Non-recursive version template -static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, - SCCHolder &Holder) { +static void computeSCCsRec(const G &Graph, typename G::GraphNodeId CurrNode, + SCCData &Data, + SCCHolder &Holder) { // See // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components @@ -139,39 +138,41 @@ static void computeSCCsRec(const G &Graph, GraphNodeId CurrNode, SCCData &Data, } } -// Iterative Implementation for Tarjan's SCC Alg. +// Iterative IMplementation for Tarjan's SCC Alg. // -> Heapoverflow through simulated Stack? template -static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { - +static void tarjanIt(const G &Graph, SCCDataIt &Data, + SCCHolder &Holder) { + using GraphNodeId = typename G::GraphNodeId; auto CurrTime = Data.Time; - for (uint32_t Vertex = 0; Vertex < Graph.Nodes.size(); Vertex++) { - if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { + for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { + if (Data.Disc[Vertex] == UINT32_MAX) { Data.CallStack.push_back({GraphNodeId(Vertex), 0}); while (!Data.CallStack.empty()) { auto Curr = Data.CallStack.pop_back_val(); - // Curr.second = 0 implies that Curr.fist was not visited before + // Curr.second = 0 implies that node Curr.fist was not visited before if (Curr.second == 0) { - Data.Disc[size_t(Curr.first)] = CurrTime; - Data.Low[size_t(Curr.first)] = CurrTime; + Data.Disc[Curr.first] = CurrTime; + Data.Low[Curr.first] = CurrTime; CurrTime++; Data.Stack.push_back(Curr.first); - Data.OnStack.set(uint32_t(Curr.first)); + Data.OnStack.set(Curr.first); } - // Curr.second > 0 implies that we came back from a recursive call + // Curr.second > 0 implies that we came back from a recursive call of + // node with higher depth if (Curr.second > 0) { - setMin(Data.Low[size_t(Curr.first)], - Data.Low[size_t(Curr.second) - 1]); + setMin(Data.Low[Curr.first], Data.Low[Curr.second - 1]); } - // find the next recursive function call + // find the next node for recursion while (Curr.second < Graph.getEdges(Curr.first).size() && - Data.Disc[size_t(Graph.getEdges(Curr.first)[Curr.second])]) { + Data.Disc[Graph.getEdges(Curr.first)[Curr.second]] != + UINT32_MAX) { GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; - if (Data.OnStack.test(uint32_t(W))) { - setMin(Data.Low[size_t(Curr.first)], Data.Disc[size_t(W)]); + if (Data.OnStack.test(W)) { + setMin(Data.Low[Curr.first], Data.Disc[W]); } Curr.second++; - // If a Node u is undiscovered i.e. Data.Disc[size_t(u)] = UINT32_MAX + // If a Node u is undiscovered i.e. Data.Disc[u] = UINT32_MAX // start a recursive function call if (Curr.second < Graph.getEdges(Curr.first).size()) { GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; @@ -179,8 +180,8 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { Data.CallStack.push_back({U, 0}); } // If Curr.first is the root of a connected component i.e. Data.Disc = - // Data.Low - if (Data.Low[size_t(Curr.first)] == Data.Disc[size_t(Curr.first)]) { + // Data.Low i.e. cycle found + if (Data.Low[Curr.first] == Data.Disc[Curr.first]) { //-> SCC found auto SCCIdx = SCCId(Holder.NumSCCs++); auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); @@ -207,10 +208,11 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, SCCHolder &Holder) { } } -template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { - SCCHolder Ret{}; +template +[[nodiscard]] SCCHolder computeSCCs(const G &Graph) { + SCCHolder Ret{}; - auto NumNodes = Graph.Nodes.size(); + auto NumNodes = Graph.Adj.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -220,7 +222,7 @@ template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { SCCData Data(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { if (!Data.Seen.test(FunId)) { - computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); } } @@ -229,10 +231,12 @@ template [[nodiscard]] SCCHolder computeSCCs(const G &Graph) { // choose which Tarjan implementation will be executed template -[[nodiscard]] SCCHolder execTarjan(const G &Graph, const bool Iterative) { - SCCHolder Ret{}; +[[nodiscard]] SCCHolder +execTarjan(const G &Graph, const bool Iterative) { + using GraphNodeId = typename G::GraphNodeId; + SCCHolder Ret{}; - auto NumNodes = Graph.Nodes.size(); + auto NumNodes = Graph.Adj.size(); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { @@ -240,11 +244,14 @@ template } SCCData Data(NumNodes); + SCCDataIt DataIt(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) { - if (Iterative) { - TarjanIt(Graph, GraphNodeId(FunId), Data, Ret); - } else { + if (Iterative) { + if (!DataIt.Senn.text(FunId)) { + tarjanIt(Graph, DataIt, Ret); + } + } else { + if (!Data.Seen.test(FunId)) { computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); } } @@ -254,12 +261,15 @@ template } template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers -computeSCCCallers(const G &Graph, const SCCHolder &SCCs); +[[nodiscard]] SCCCallers +computeSCCCallers(const G &Graph, + const SCCHolder &SCCs); template -auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { - SCCCallers Ret; +auto computeSCCCallers(const G &Graph, + const SCCHolder &SCCs) + -> SCCCallers { + SCCCallers Ret; Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); llvm::SmallBitVector Roots(SCCs.NumSCCs, true); @@ -288,36 +298,12 @@ auto computeSCCCallers(const G &Graph, const SCCHolder &SCCs) -> SCCCallers { } template -void analysis::call_graph::SCCCallers::print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, - const G &Graph) { - OS << "digraph SCCTAG {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { - OS << " " << Ctr << "[label=\""; - for (auto TNId : SCCs.NodesInSCC[Ctr]) { - auto TN = Graph.Nodes[TNId]; - printNode(OS, TN); - OS << "\\n"; - } - OS << "\"];\n"; - } - - OS << '\n'; - - size_t Ctr = 0; - for (const auto &Targets : ChildrenOfSCC) { - for (auto Tgt : Targets) { - OS << " " << Ctr << "->" << uint32_t(Tgt) << ";\n"; - } - ++Ctr; - } -} - -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder -computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers); - -inline auto computeSCCOrder(const SCCHolder &SCCs, const SCCCallers &Callers) +[[nodiscard]] SCCOrder +computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers); +template +inline auto computeSCCOrder(const SCCHolder &SCCs, + const SCCCallers &Callers) -> SCCOrder { SCCOrder Ret; Ret.SCCIds.reserve(SCCs.NumSCCs); @@ -359,3 +345,5 @@ template <> struct DenseMapInfo { static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } }; } // namespace llvm + +#endif diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 4f10c3b9bb..a47167dca6 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -259,8 +259,7 @@ template static constexpr size_t variant_idx = detail::variant_idx::value; template -static constexpr bool has_llvm_dense_map_info = - detail::has_llvm_dense_map_info::value; +PSR_CONCEPT has_llvm_dense_map_info = detail::has_llvm_dense_map_info::value; struct TrueFn { template diff --git a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp index b34384bd22..c46f4b2d9f 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp @@ -63,3 +63,18 @@ LLVMVFTableProvider::getVFTableOrNull(const llvm::StructType *Type) const { auto It = TypeVFTMap.find(Type); return It != TypeVFTMap.end() ? &It->second : nullptr; } + +const llvm::GlobalVariable * +LLVMVFTableProvider::getVFTableGlobal(const llvm::StructType *Type) const { + auto Name = LLVMTypeHierarchy::removeStructOrClassPrefix(*Type); + return getVFTableGlobal(Name); +} + +const llvm::GlobalVariable * +LLVMVFTableProvider::getVFTableGlobal(const std::string &ClearTypeName) const { + if (auto It = ClearNameTVMap.find(ClearTypeName); + It != ClearNameTVMap.end()) { + return It->second; + } + return nullptr; +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp index a8a30b1d38..5aea14efc4 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp @@ -14,6 +14,7 @@ #include "phasar/PhasarLLVM/Utils/AliasSets.h" #include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Logger.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/STLExtras.h" @@ -93,9 +94,10 @@ getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { if (CurrTy->isPointerTy()) { size_t Idx = CurrByteOffs / PointerSize; if (CurrByteOffs % PointerSize) [[unlikely]] { - llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer " - "found at offset " - << CurrByteOffs << " in type " << *Ty; + PHASAR_LOG_LEVEL(WARNING, "Unaligned pointer.."); + /*llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer + " "found at offset " + << CurrByteOffs << " in type " << *Ty;*/ } assert(Ret.size() > Idx && "reserved unsufficient space for pointer indices"); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp index b032b19660..4fe8f959aa 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp @@ -11,8 +11,8 @@ #include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" -#include "phasar/PhasarLLVM/Utils/Compressor.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Compressor.h" using namespace psr; using namespace psr::analysis::call_graph; @@ -26,13 +26,15 @@ initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, } } -static void -propagate(TypeAssignment &TA, - const SCCCallers &Deps, - SCCId CurrSCC) { +static void propagate( + TypeAssignment &TA, + const SCCCallers + &Deps, + SCCId CurrSCC) { const auto &Types = TA.TypesPerSCC[size_t(CurrSCC)]; - if (Types.empty()) + if (Types.empty()) { return; + } for (auto Succ : Deps.ChildrenOfSCC[size_t(CurrSCC)]) { TA.TypesPerSCC[size_t(Succ)].insert(Types.begin(), Types.end()); @@ -42,7 +44,8 @@ propagate(TypeAssignment &TA, TypeAssignment analysis::call_graph::propagateTypes( const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, - const SCCCallers &Deps, + const SCCCallers + &Deps, const SCCOrder &Order) { TypeAssignment Ret; Ret.TypesPerSCC.resize(SCCs.NumSCCs); diff --git a/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp b/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp new file mode 100644 index 0000000000..84becb7a85 --- /dev/null +++ b/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp @@ -0,0 +1,121 @@ +#include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" + +#include "llvm/IR/Argument.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" + +namespace psr { + +template +void foreachFilteredAliasSetImpl( + const llvm::Value *Fact, const llvm::Instruction *At, + llvm::function_ref WithAlias, + FilteredAliasSet::alias_info_ref_t PT) { + + const auto *Base = Fact->stripPointerCastsAndAliases(); + + static constexpr auto GetFunction = + [](const llvm::Value *V) -> const llvm::Function * { + if (const auto *Inst = llvm::dyn_cast(V)) { + return Inst->getFunction(); + } + if (const auto *Arg = llvm::dyn_cast(V)) { + return Arg->getParent(); + } + return nullptr; + }; + + // If mustNoalias is false, then p1 and p2 may alias. If mustNoalias is true, + // then p1 and p2 definitely are not aliases. + static constexpr auto MustNoalias = [](const llvm::Value *P1, + const llvm::Value *P2) { + if (P1 == P2) { + return false; + } + assert(P1); + assert(P2); + if (const auto *Alloca1 = llvm::dyn_cast(P1)) { + if (llvm::isa(P2)) { + return true; + } + if (const auto *Alloca2 = llvm::dyn_cast(P2)) { + return !Alloca1->getAllocatedType()->isPointerTy() && + !Alloca2->getAllocatedType()->isPointerTy(); + } + } else if (const auto *Glob1 = llvm::dyn_cast(P1)) { + if (llvm::isa(P2) || Glob1->isConstant()) { + return true; + } + if (const auto *Glob2 = llvm::dyn_cast(P2)) { + return true; // approximation + } + } else if (const auto *Glob2 = llvm::dyn_cast(P2)) { + return Glob2->isConstant(); + } + + return false; + }; + + const auto *FactFun = At ? At->getFunction() : GetFunction(Fact); + + PT(Fact, [&](const llvm::Value *Alias) { + if (FactFun) { + // Skip inter-procedural aliases + const auto *AliasFun = GetFunction(Alias); + if (FactFun != AliasFun && AliasFun) { + return; + } + } + if (Fact == Alias) { + WithAlias(Alias); + return; + } + + const auto *AliasBase = Alias->stripPointerCastsAndAliases(); + if (MustNoalias(Base, AliasBase)) { + return; + } + + // bool IsMatching = false; + // auto Res = PT.alias(Fact, Alias, At); + + // if constexpr (MustAlias) { + // IsMatching = Res == psr::AliasResult::MustAlias; + // } else { + // IsMatching = Res != psr::AliasResult::NoAlias; + // } + // if (IsMatching) { + // WithAlias(Alias); + // } + WithAlias(Alias); + + if (const auto *Load = llvm::dyn_cast(Alias)) { + WithAlias(Load->getPointerOperand()); + } + }); +} + +auto FilteredAliasSet::getAliasSet(d_t Val, n_t At) -> container_type { + container_type Ret; + foreachFilteredAliasSetImpl( + Val, At, [&Ret](d_t Alias) { Ret.insert(Alias); }, PT); + return Ret; +} +// auto FilteredAliasSet::getMustAliasSet(d_t Val, n_t At) -> container_type { +// container_type Ret; +// foreachFilteredAliasSetImpl( +// Val, At, [&Ret](d_t Alias) { Ret.insert(Alias); }, PT); +// return Ret; +// } + +void FilteredAliasSet::foreachAlias(d_t Fact, n_t At, + llvm::function_ref WithAlias) { + foreachFilteredAliasSetImpl(Fact, At, WithAlias, PT); +} +// void FilteredAliasSet::foreachdMustAlias( +// d_t Fact, n_t At, llvm::function_ref WithAlias) { +// foreachFilteredAliasSetImpl(Fact, At, WithAlias, PT); +// } +} // namespace psr diff --git a/unittests/Utils/CMakeLists.txt b/unittests/Utils/CMakeLists.txt index 73c042570a..d84306e783 100644 --- a/unittests/Utils/CMakeLists.txt +++ b/unittests/Utils/CMakeLists.txt @@ -10,6 +10,7 @@ set(UtilsSources AnalysisPrinterTest.cpp OnTheFlyAnalysisPrinterTest.cpp SourceMgrPrinterTest.cpp + SCCGenericTest.cpp ) if(PHASAR_ENABLE_DYNAMIC_LOG) diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 1148b5121d..1f59fc55bc 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -7,10 +7,11 @@ * Fabian Schiebel and other *****************************************************************************/ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" +#include "phasar/Utils/SCCGeneric.h" #include "gtest/gtest.h" +#include #include #include @@ -35,23 +36,25 @@ class ExampleGraph { }; TEST(SCCGenericTest, SCCTest) { - ExampleGraph Graph; - std::vector> list = { - {ExampleGraph::GraphNodeId(2)}, - {ExampleGraph::GraphNodeId(0)}, - {ExampleGraph::GraphNodeId(1)}, - {ExampleGraph::GraphNodeId(1), ExampleGraph::GraphNodeId(2)}, - {ExampleGraph::GraphNodeId(1)}, - {ExampleGraph::GraphNodeId(4), ExampleGraph::GraphNodeId(6)}, - {ExampleGraph::GraphNodeId(4), ExampleGraph::GraphNodeId(7)}, - {ExampleGraph::GraphNodeId(5)}}; + using GraphNodeId = ExampleGraph::GraphNodeId; + ExampleGraph Graph{{{GraphNodeId(2)}, + {GraphNodeId(0)}, + {GraphNodeId(1)}, + {GraphNodeId(1), GraphNodeId(2)}, + {GraphNodeId(1)}, + {GraphNodeId(4), GraphNodeId(6)}, + {GraphNodeId(4), GraphNodeId(7)}, + {GraphNodeId(5)}}}; auto OutputRec = analysis::call_graph::execTarjan(Graph, false); auto OutputIt = analysis::call_graph::execTarjan(Graph, true); - ASSERT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) - << "Unequal number of SCC components\n" - << "Abort Test\n"; - for (int ID = 0; ID < Graph.Adj.size(); ID++) { + ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) + << "Iterative Approach did not reach all nodes\n"; + ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) + << "Recursive Approach did not reach all nodes\n"; + EXPECT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) + << "Unequal number of SCC components\n"; + for (size_t ID = 0; ID < Graph.Adj.size(); ID++) { EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) << "SCCs differ at Index: " << std::to_string(ID) << "\n"; } From 7bcd34a8c25399c8963305e7d7555c6a792263ea Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Fri, 6 Sep 2024 21:16:35 +0200 Subject: [PATCH 07/27] minor corrections --- .../ControlFlow/TypeAssignmentGraph.h | 9 +- .../PhasarLLVM/ControlFlow/TypePropagator.h | 16 +- include/phasar/PhasarLLVM/Utils/AliasSets.h | 41 ---- .../PhasarLLVM/Utils/FilteredAliasSet.h | 4 +- include/phasar/Utils/BitSet.h | 215 ++++++++++++++++++ include/phasar/Utils/SCCGeneric.h | 99 ++++---- include/phasar/Utils/TypeTraits.h | 8 +- include/phasar/Utils/TypedVector.h | 109 +++++++++ .../DataFlow/IfdsIde/TypeAssignmentGraph.cpp | 122 +++++----- .../DataFlow/IfdsIde/TypePropagator.cpp | 18 +- unittests/Utils/SCCGenericTest.cpp | 5 +- 11 files changed, 473 insertions(+), 173 deletions(-) delete mode 100644 include/phasar/PhasarLLVM/Utils/AliasSets.h create mode 100644 include/phasar/Utils/BitSet.h create mode 100644 include/phasar/Utils/TypedVector.h diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h index 0a32fd465d..391379b15c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h @@ -138,8 +138,9 @@ struct TypeAssignmentGraph { assert(size_t(From) < Adj.size()); assert(size_t(To) < Adj.size()); - if (From == To) + if (From == To) { return; + } Adj[size_t(From)].insert(To); } @@ -153,12 +154,6 @@ struct TypeAssignmentGraph { &BaseCG, psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP); -[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP); - void printNode(llvm::raw_ostream &OS, TAGNode TN); }; // namespace psr::analysis::call_graph diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h index 7896c86335..ceab9bcd3c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h @@ -30,17 +30,15 @@ struct SCCOrder; struct TypeAssignment { llvm::SmallVector, 0> TypesPerSCC; - LLVM_LIBRARY_VISIBILITY void - print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs); + void print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs); }; -[[nodiscard]] LLVM_LIBRARY_VISIBILITY TypeAssignment propagateTypes( - const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs, - const SCCCallers - &Deps, - const SCCOrder &Order); +[[nodiscard]] TypeAssignment +propagateTypes(const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs, + const SCCCallers &Deps, + const SCCOrder &Order); } // namespace psr::analysis::call_graph #endif diff --git a/include/phasar/PhasarLLVM/Utils/AliasSets.h b/include/phasar/PhasarLLVM/Utils/AliasSets.h deleted file mode 100644 index dc37c74752..0000000000 --- a/include/phasar/PhasarLLVM/Utils/AliasSets.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include "phasar/Utils/StableVector.h" - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/IR/Value.h" - -namespace psr::analysis::call_graph { -struct ObjectGraph; - -struct AliasSets { - using AliasSetTy = llvm::SmallDenseSet; - - psr::StableVector AliasSetOwner{}; - llvm::SmallVector AliasSetMap{}; - - void print(llvm::raw_ostream &OS, const ObjectGraph &Graph) const; -}; - -class AliasInfo { -public: - explicit AliasInfo(const ObjectGraph *Graph) noexcept : Graph(Graph) { - assert(Graph != nullptr); - } - - auto aliases() { - return [this](const llvm::Value *Fact, - llvm::function_ref WithAlias) { - return foreachAlias(Fact, WithAlias); - }; - }; - -private: - void foreachAlias(const llvm::Value *Fact, - llvm::function_ref WithAlias); - - const ObjectGraph *Graph{}; -}; - -} // namespace psr::analysis::call_graph diff --git a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h index feb532f6e1..cdcec3c1f9 100644 --- a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h +++ b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h @@ -50,12 +50,12 @@ class FilteredAliasSet { }) {} [[nodiscard]] container_type getAliasSet(d_t Val, n_t At); - [[nodiscard]] container_type getMustAliasSet(d_t Val, n_t At) { + [[nodiscard]] container_type getMustAliasSet(d_t Val, n_t /*At*/) { return {Val}; } void foreachAlias(d_t Fact, n_t At, llvm::function_ref WithAlias); - void foreachMustAlias(d_t Fact, n_t At, + void foreachMustAlias(d_t Fact, n_t /*At*/, llvm::function_ref WithAlias) { WithAlias(Fact); } diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h new file mode 100644 index 0000000000..a5ae207a84 --- /dev/null +++ b/include/phasar/Utils/BitSet.h @@ -0,0 +1,215 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_UTILS_BITSET_H +#define PHASAR_UTILS_BITSET_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" + +#include +#include +#include + +namespace psr { +template class BitSet { +public: + class Iterator { + public: + using value_type = IdT; + using reference = IdT; + using pointer = const IdT *; + using difference_type = ptrdiff_t; + using iterator_category = std::forward_iterator_tag; + + Iterator(llvm::SmallBitVector::const_set_bits_iterator It) noexcept + : It(It) {} + + Iterator &operator++() noexcept { + ++It; + return *this; + } + Iterator operator++(int) noexcept { + auto Ret = *this; + ++*this; + return Ret; + } + reference operator*() const noexcept { return IdT(*It); } + + bool operator==(const Iterator &Other) const noexcept { + return It == Other.It; + } + bool operator!=(const Iterator &Other) const noexcept { + return !(*this == Other); + } + + private: + llvm::SmallBitVector::const_set_bits_iterator It; + }; + + using iterator = Iterator; + using value_type = IdT; + + BitSet() noexcept = default; + explicit BitSet(size_t InitialCapacity) : Bits(InitialCapacity) {} + explicit BitSet(size_t InitialCapacity, bool InitialValue) + : Bits(InitialCapacity, InitialValue) {} + + void reserve(size_t Cap) { + if (Bits.size() < Cap) { + Bits.resize(Cap); + } + } + + [[nodiscard]] bool contains(IdT Id) const noexcept { + auto Index = uint32_t(Id); + return Bits.size() > Index && Bits.test(Index); + } + + void insert(IdT Id) { + auto Index = uint32_t(Id); + if (Bits.size() <= Index) { + Bits.resize(Index + 1); + } + + Bits.set(Index); + } + + [[nodiscard]] bool tryInsert(IdT Id) { + auto Index = uint32_t(Id); + if (Bits.size() <= Index) { + Bits.resize(Index + 1); + } + + bool Ret = !Bits.test(Index); + Bits.set(Index); + return Ret; + } + + void erase(IdT Id) noexcept { + if (Bits.size() > size_t(Id)) { + Bits.reset(uint32_t(Id)); + } + } + [[nodiscard]] bool tryErase(IdT Id) noexcept { + if (contains(Id)) { + return Bits.reset(uint32_t(Id)), true; + } + + return false; + } + + void mergeWith(const BitSet &Other) { Bits |= Other.Bits; } + + bool tryMergeWith(const BitSet &Other) { + /// TODO: Make this more efficient + return *this == Other ? false : (mergeWith(Other), true); + } + + void clear() noexcept { Bits.reset(); } + + [[nodiscard]] friend bool operator==(const BitSet &Lhs, + const BitSet &Rhs) noexcept { + bool LeftEmpty = Lhs.Bits.none(); + bool RightEmpty = Rhs.Bits.none(); + if (LeftEmpty || RightEmpty) { + return LeftEmpty == RightEmpty; + } + // Check, whether Lhs and Rhs actually have the same bits set and not + // whether their internal representation is exactly identitcal + uintptr_t LhsStore{}; + uintptr_t RhsStore{}; + + auto LhsWords = Lhs.Bits.getData(LhsStore); + auto RhsWords = Rhs.Bits.getData(RhsStore); + if (LhsWords.size() == RhsWords.size()) { + return LhsWords == RhsWords; + } + auto MinSize = std::min(LhsWords.size(), RhsWords.size()); + if (LhsWords.slice(0, MinSize) != RhsWords.slice(0, MinSize)) { + return false; + } + auto Rest = (LhsWords.size() > RhsWords.size() ? LhsWords : RhsWords) + .slice(MinSize); + return std::all_of(Rest.begin(), Rest.end(), + [](auto Word) { return Word == 0; }); + } + + [[nodiscard]] friend bool operator!=(const BitSet &Lhs, + const BitSet &Rhs) noexcept { + return !(Lhs == Rhs); + } + + [[nodiscard]] bool any() const noexcept { return Bits.any(); } + + [[nodiscard]] iterator begin() const noexcept { + return Bits.set_bits_begin(); + } + [[nodiscard]] iterator end() const noexcept { return Bits.set_bits_end(); } + + void operator|=(const BitSet &Other) { Bits |= Other.Bits; } + void operator-=(const BitSet &Other) { Bits.reset(Other.Bits); } + + [[nodiscard]] BitSet operator-(const BitSet &Other) const { + // TODO: keep allocation small by looping from the end and truncating all + // words that result in all-zero + auto Ret = *this; + Ret -= Other; + return Ret; + } + + BitSet &insertAllOf(const BitSet &Other) { + Bits |= Other.Bits; + return *this; + } + BitSet &eraseAllOf(const BitSet &Other) { + Bits.reset(Other.Bits); + return *this; + } + + [[nodiscard]] bool isSubsetOf(const BitSet &Of) const { + uintptr_t Buf = 0; + uintptr_t OfBuf = 0; + + auto Words = Bits.getData(Buf); + auto OfWords = Of.Bits.getData(OfBuf); + if (Words.size() > OfWords.size()) { + if (llvm::any_of(Words.drop_front(OfWords.size()), + [](uintptr_t W) { return W != 0; })) { + return false; + } + } + + for (auto [W, OfW] : llvm::zip(Words, OfWords)) { + if ((W & ~OfW) != 0) { + return false; + } + } + + return true; + } + + [[nodiscard]] bool isSupersetOf(const BitSet &Of) const { + return Of.isSubsetOf(*this); + } + + // The number of bits available + [[nodiscard]] size_t capacity() const noexcept { return Bits.size(); } + // The number of bits set to 1 + [[nodiscard]] size_t size() const noexcept { return Bits.count(); } + [[nodiscard]] bool empty() const noexcept { return Bits.none(); } + + [[nodiscard]] bool test(uint32_t Ident) { return Bits.test(Ident); } + +private: + llvm::SmallBitVector Bits; +}; +} // namespace psr + +#endif diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 12a82d5ac3..7f883b715d 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -11,6 +11,8 @@ #define PHASAR_UTILS_SCCGENERIC_H #include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" +#include "phasar/Utils/BitSet.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" @@ -23,6 +25,8 @@ #include "llvm/Support/raw_ostream.h" #include +#include +#include namespace psr { class LLVMBasedICFG; @@ -34,7 +38,7 @@ enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; // holds the scc's of a given graph template struct SCCHolder { - llvm::SmallVector SCCOfNode{}; + llvm::SmallVector SCCOfNode; llvm::SmallVector> NodesInSCC{}; size_t NumSCCs = 0; }; @@ -42,8 +46,8 @@ template struct SCCHolder { // holds a graph were the scc's are compressed to a single node. Resulting graph // is a DAG template struct SCCCallers { - llvm::SmallVector, 0> ChildrenOfSCC{}; - llvm::SmallVector SCCRoots{}; + llvm::SmallVector, 0> ChildrenOfSCC; + llvm::SmallVector SCCRoots; void print(llvm::raw_ostream &OS, const SCCHolder &SCCs, const G &Graph); @@ -68,13 +72,13 @@ template struct SCCData { }; template struct SCCDataIt { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; + TypedVector Disc; + TypedVector Low; + BitSet OnStack; llvm::SmallVector Stack; llvm::SmallVector> CallStack; uint32_t Time = 0; - llvm::SmallBitVector Seen; + BitSet Seen; explicit SCCDataIt(size_t NumFuns) : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), @@ -138,15 +142,16 @@ static void computeSCCsRec(const G &Graph, typename G::GraphNodeId CurrNode, } } -// Iterative IMplementation for Tarjan's SCC Alg. -// -> Heapoverflow through simulated Stack? +// Iterative Implementation for Tarjan's SCC Alg. template -static void tarjanIt(const G &Graph, SCCDataIt &Data, - SCCHolder &Holder) { +static void tarjanSCCIt(const G &Graph, + SCCDataIt &Data, + SCCHolder &Holder) { using GraphNodeId = typename G::GraphNodeId; - auto CurrTime = Data.Time; + uint32_t Unvisited = UINT32_MAX; + auto CurrTime = 0; for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { - if (Data.Disc[Vertex] == UINT32_MAX) { + if (Data.Disc[GraphNodeId(Vertex)] == Unvisited) { Data.CallStack.push_back({GraphNodeId(Vertex), 0}); while (!Data.CallStack.empty()) { auto Curr = Data.CallStack.pop_back_val(); @@ -156,52 +161,54 @@ static void tarjanIt(const G &Graph, SCCDataIt &Data, Data.Low[Curr.first] = CurrTime; CurrTime++; Data.Stack.push_back(Curr.first); - Data.OnStack.set(Curr.first); + Data.OnStack.insert(Curr.first); } // Curr.second > 0 implies that we came back from a recursive call of // node with higher depth if (Curr.second > 0) { - setMin(Data.Low[Curr.first], Data.Low[Curr.second - 1]); + GraphNodeId Pred = Graph.getEdges(Curr.first)[Curr.second - 1]; + setMin(Data.Low[Curr.first], Data.Low[Pred]); } // find the next node for recursion while (Curr.second < Graph.getEdges(Curr.first).size() && Data.Disc[Graph.getEdges(Curr.first)[Curr.second]] != - UINT32_MAX) { + Unvisited) { GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; - if (Data.OnStack.test(W)) { + if (Data.OnStack.test(uint32_t(W))) { setMin(Data.Low[Curr.first], Data.Disc[W]); } Curr.second++; - // If a Node u is undiscovered i.e. Data.Disc[u] = UINT32_MAX - // start a recursive function call - if (Curr.second < Graph.getEdges(Curr.first).size()) { - GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; - Data.CallStack.push_back({Curr.first, Curr.second++}); - Data.CallStack.push_back({U, 0}); - } - // If Curr.first is the root of a connected component i.e. Data.Disc = - // Data.Low i.e. cycle found - if (Data.Low[Curr.first] == Data.Disc[Curr.first]) { - //-> SCC found - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != Curr.first) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } + } + // If a Node u is undiscovered i.e. Data.Disc[u] = UINT32_MAX + // start a recursive function call + if (Curr.second < Graph.getEdges(Curr.first).size()) { + GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; + Data.CallStack.push_back({Curr.first, (Curr.second++)}); + Data.CallStack.push_back({U, 0}); + continue; + } + // If Curr.first is the root of a connected component i.e. Data.Disc = + // Data.Low i.e. cycle found + if (Data.Low[Curr.first] == Data.Disc[Curr.first]) { + //-> SCC found + auto SCCIdx = SCCId(Holder.NumSCCs++); + auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); + + assert(!Data.Stack.empty()); + while (Data.Stack.back() != Curr.first) { auto Fun = Data.Stack.pop_back_val(); Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); + Data.OnStack.erase(Fun); + Data.Seen.insert(Fun); NodesInSCC.push_back(Fun); } + + auto Fun = Data.Stack.pop_back_val(); + Holder.SCCOfNode[size_t(Fun)] = SCCIdx; + Data.OnStack.erase(Fun); + Data.Seen.insert(Fun); + NodesInSCC.push_back(Fun); } } } @@ -243,12 +250,12 @@ execTarjan(const G &Graph, const bool Iterative) { return Ret; } - SCCData Data(NumNodes); - SCCDataIt DataIt(NumNodes); + SCCData Data(NumNodes); + SCCDataIt DataIt(NumNodes); for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { if (Iterative) { - if (!DataIt.Senn.text(FunId)) { - tarjanIt(Graph, DataIt, Ret); + if (!DataIt.Seen.test(FunId)) { + tarjanSCCIt(Graph, DataIt, Ret); } } else { if (!Data.Seen.test(FunId)) { diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index a47167dca6..38e452ee6f 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -24,7 +24,9 @@ namespace psr { #if __cplusplus < 202002L #define PSR_CONCEPT static constexpr bool -template struct type_identity { using type = T; }; +template struct TypeIdentity { // type_identity + using type = T; +}; #else #define PSR_CONCEPT concept template using type_identity = std::type_identity; @@ -170,7 +172,7 @@ template struct variant_idx, T> : std::integral_constant< size_t, - std::variant...>(type_identity{}).index()> {}; + std::variant...>(TypeIdentity{}).index()> {}; template struct has_llvm_dense_map_info : std::false_type {}; @@ -253,7 +255,7 @@ PSR_CONCEPT IsEqualityComparable = detail::IsEqualityComparable::value; template PSR_CONCEPT AreEqualityComparable = detail::AreEqualityComparable::value; -template using type_identity_t = typename type_identity::type; +template using type_identity_t = typename TypeIdentity::type; template static constexpr size_t variant_idx = detail::variant_idx::value; diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h new file mode 100644 index 0000000000..0f315153a4 --- /dev/null +++ b/include/phasar/Utils/TypedVector.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#ifndef PHASAR_UTILS_TYPEDVECTOR_H +#define PHASAR_UTILS_TYPEDVECTOR_H + +#include "phasar/Utils/ByRef.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include +#include +#include + +namespace psr { +template +class TypedVector { +public: + TypedVector() noexcept = default; + TypedVector(std::initializer_list IList) : Vec(IList) {} + TypedVector(size_t Size) : Vec(Size) {} + TypedVector(size_t Size, ValueT Default) : Vec(Size, Default){}; + + template + explicit TypedVector(Iter From, Iter To) + : Vec(std::move(From), std::move(To)) {} + + explicit TypedVector(llvm::ArrayRef Arr) + : Vec(Arr.begin(), Arr.end()) {} + + void reserve(size_t Capa) { Vec.reserve(Capa); } + + void resize(size_t Sz) { Vec.resize(Sz); } + + void resize(size_t Sz, ByConstRef Val) { Vec.resize(Sz, Val); } + + [[nodiscard]] bool empty() const noexcept { return Vec.empty(); } + [[nodiscard]] bool any() const noexcept { return !Vec.empty(); } + [[nodiscard]] size_t size() const noexcept { return Vec.size(); } + + [[nodiscard]] bool inbounds(IdT Id) const noexcept { + return size_t(Id) < size(); + } + + [[nodiscard]] ByConstRef operator[](IdT Id) const & { + assert(size_t(Id) == size()); + return Vec[size_t(Id)]; + } + + [[nodiscard]] ValueT &operator[](IdT Id) & { + assert(size_t(Id) < size()); // was == before + return Vec[size_t(Id)]; + } + + [[nodiscard]] ValueT operator[](IdT Id) && { + assert(size_t(Id) < size()); // was == before + return std::move(Vec[size_t(Id)]); + } + + [[nodiscard]] auto begin() noexcept { return Vec.begin(); } + [[nodiscard]] auto end() noexcept { return Vec.end(); } + + [[nodiscard]] auto begin() const noexcept { return Vec.begin(); } + [[nodiscard]] auto end() const noexcept { return Vec.end(); } + + template ValueT &emplace_back(ArgsT &&...Args) { + return Vec.emplace_back(std::forward(Args)...); + } + + void push_back(ByConstRef Val) { Vec.push_back(Val); } + + template + std::enable_if_t> push_back(ValueT &&Val) { + Vec.push_back(std::move(Val)); + } + + [[nodiscard]] bool operator==(const TypedVector &Other) const noexcept { + return Vec == Other.Vec; + } + [[nodiscard]] bool operator!=(const TypedVector &Other) const noexcept { + return !(*this == Other); + } + + [[nodiscard]] llvm::ArrayRef asRef() const &noexcept { return Vec; } + [[nodiscard]] llvm::ArrayRef asRef() &&noexcept = delete; + + [[nodiscard]] llvm::ArrayRef + // NOLINTNEXTLINE(readability-identifier-naming) + drop_front(size_t Offs) const &noexcept { + return asRef().drop_front(Offs); + } + [[nodiscard]] llvm::ArrayRef + drop_front(size_t Offs) &&noexcept = delete; + +private: + llvm::SmallVector Vec{}; +}; +} // namespace psr + +#endif diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp index 5aea14efc4..0924005468 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp @@ -11,7 +11,6 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" -#include "phasar/PhasarLLVM/Utils/AliasSets.h" #include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" @@ -175,12 +174,14 @@ static void initializeWithFun(const llvm::Function *Fun, // Add all locals // Add return - if (Fun->isDeclaration()) + if (Fun->isDeclaration()) { return; + } for (const auto &Arg : Fun->args()) { - if (!Arg.getType()->isPointerTy()) + if (!Arg.getType()->isPointerTy()) { continue; + } addTAGNode({Variable{&Arg}}, TAG); } @@ -201,17 +202,20 @@ static void initializeWithFun(const llvm::Function *Fun, addTAGNode({Variable{&I}}, TAG); } - if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) + if (Fun->getReturnType() && Fun->getReturnType()->isPointerTy()) { addTAGNode({Return{Fun}}, TAG); + } } [[nodiscard]] static bool isVTableOrFun(const llvm::Value *Val) { const auto *Base = Val->stripPointerCastsAndAliases(); - if (llvm::isa(Base)) + if (llvm::isa(Base)) { return true; + } - if (const auto *Glob = llvm::dyn_cast(Base)) + if (const auto *Glob = llvm::dyn_cast(Base)) { return Glob->isConstant() && Glob->getName().startswith("_ZTV"); + } return false; } @@ -220,13 +224,15 @@ static void handleAlloca(const llvm::AllocaInst *Alloca, TypeAssignmentGraph &TAG, const psr::LLVMVFTableProvider &VTP) { auto TN = TAG.get({Variable{Alloca}}); - if (!TN) + if (!TN) { return; + } const auto *AllocTy = llvm::dyn_cast(Alloca->getAllocatedType()); - if (!AllocTy) + if (!AllocTy) { return; + } if (const auto *TV = VTP.getVFTableGlobal(AllocTy)) { TAG.TypeEntryPoints[*TN].insert(TV); @@ -250,22 +256,25 @@ static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, static void handleGEP(const llvm::GetElementPtrInst *GEP, TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { auto To = TAG.get({Variable{GEP}}); - if (!To) + if (!To) { return; + } if (!GEP->isInBounds()) { auto From = TAG.get({Variable{GEP->getPointerOperand()}}); - if (From && To) + if (From && To) { TAG.addEdge(*From, *To); + } return; } // TODO: Is this correct? -- also check load auto From = getGEPNode(GEP, TAG, DL); - if (From) + if (From) { TAG.addEdge(*From, *To); + } } static bool handleEntryForStore(const llvm::StoreInst *Store, @@ -274,8 +283,9 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); bool IsEntry = isVTableOrFun(Base); - if (!IsEntry) + if (!IsEntry) { return false; + } if (const auto *GEPDest = llvm::dyn_cast(Store->getPointerOperand())) { @@ -286,8 +296,9 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, if (const auto *FldDest = std::get_if(&GEPNode.Label)) { auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); - if (ApproxDest) + if (ApproxDest) { TAG.TypeEntryPoints[*ApproxDest].insert(Base); + } } } } @@ -296,8 +307,9 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, // TODO: Fuse store and GEP! auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) + if (!DestNodeId) { return; + } TAG.TypeEntryPoints[*DestNodeId].insert(Base); }); @@ -307,12 +319,14 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, TAGAliasInfo AI, const llvm::DataLayout &DL) { - if (handleEntryForStore(Store, TAG, AI, DL)) + if (handleEntryForStore(Store, TAG, AI, DL)) { return; + } auto From = TAG.get({Variable{Store->getValueOperand()}}); - if (!From) + if (!From) { return; + } if (const auto *GEPDest = llvm::dyn_cast(Store->getPointerOperand())) { @@ -323,8 +337,9 @@ static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, if (const auto *FldDest = std::get_if(&GEPNode.Label)) { auto ApproxDest = TAG.get({Field{FldDest->Base, SIZE_MAX}}); - if (ApproxDest) + if (ApproxDest) { TAG.addEdge(*From, *ApproxDest); + } } } } @@ -333,8 +348,9 @@ static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, // TODO: Fuse store and GEP! auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) + if (!DestNodeId) { return; + } TAG.addEdge(*From, *DestNodeId); }); @@ -343,29 +359,34 @@ static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { auto To = TAG.get({Variable{Load}}); - if (!To) + if (!To) { return; + } auto From = TAG.get({Variable{Load->getPointerOperand()}}); - if (From) + if (From) { TAG.addEdge(*From, *To); + } if (const auto *GEPDest = llvm::dyn_cast(Load->getPointerOperand())) { - if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) + if (auto GEPNodeId = getGEPNode(GEPDest, TAG, DL)) { TAG.addEdge(*GEPNodeId, *To); + } } } static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { auto To = TAG.get({Variable{Phi}}); - if (!To) + if (!To) { return; + } for (const auto &Inc : Phi->incoming_values()) { auto From = TAG.get({Variable{Inc.get()}}); - if (From) + if (From) { TAG.addEdge(*From, *To); + } } } @@ -373,18 +394,21 @@ static llvm::StringRef extractTypeName(llvm::StringRef CtorName) { // Example: _ZN3OneC2Ev auto EndIdx = CtorName.rfind("C2E"); - if (EndIdx == llvm::StringRef::npos) + if (EndIdx == llvm::StringRef::npos) { EndIdx = CtorName.rfind("C1E"); + } - if (EndIdx == llvm::StringRef::npos) + if (EndIdx == llvm::StringRef::npos) { EndIdx = CtorName.size(); + } auto StartIdx = EndIdx; while (StartIdx) { --StartIdx; - if (llvm::isDigit(CtorName[StartIdx])) + if (llvm::isDigit(CtorName[StartIdx])) { break; + } } return CtorName.slice(StartIdx, EndIdx); } @@ -429,8 +453,9 @@ static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, const llvm::Function *Callee, const psr::LLVMVFTableProvider &VTP) { - if (!psr::isHeapAllocatingFunction(Callee)) + if (!psr::isHeapAllocatingFunction(Callee)) { return; + } if (const auto *MDNode = Call->getMetadata("heapallocsite")) { @@ -499,8 +524,9 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, for (const auto &Arg : Call->args()) { auto TN = TAG.get({Variable{Arg.get()}}); Args.push_back(TN); - if (TN) + if (TN) { HasArgNode = true; + } bool IsEntry = isVTableOrFun(Arg.get()); EntryArgs.push_back(IsEntry); @@ -509,16 +535,18 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, auto CSNod = TAG.get({Variable{Call}}); // TODO: Handle struct returns that contain pointers - if (!HasArgNode && !CSNod) + if (!HasArgNode && !CSNod) { return; + } for (const auto *Callee : BaseCG.getCalleesOfCallAt(Call)) { handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { auto ParamNodId = TAG.get({Variable{&Param}}); - if (!ParamNodId) + if (!ParamNodId) { continue; + } if (EntryArgs.test(Param.getArgNo())) { TAG.TypeEntryPoints[*ParamNodId].insert( @@ -526,19 +554,22 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, ->stripPointerCastsAndAliases()); } - if (!Arg) + if (!Arg) { continue; + } - if (!Param.hasStructRetAttr()) + if (!Param.hasStructRetAttr()) { TAG.addEdge(*Arg, *ParamNodId); + } // if (!Param.hasByValAttr()) // TAG.addEdge(*ParamNodId, *Arg); } if (CSNod) { auto RetNod = TAG.get({Return{Callee}}); - if (RetNod) + if (RetNod) { TAG.addEdge(*RetNod, *CSNod); + } } } } @@ -547,8 +578,9 @@ static void handleReturn(const llvm::ReturnInst *Ret, TypeAssignmentGraph &TAG) { auto TNId = TAG.get({Return{Ret->getFunction()}}); - if (!TNId) + if (!TNId) { return; + } if (const auto *RetVal = Ret->getReturnValue()) { const auto *Base = RetVal->stripPointerCastsAndAliases(); @@ -558,8 +590,9 @@ static void handleReturn(const llvm::ReturnInst *Ret, } auto From = TAG.get({Variable{Base}}); - if (From) + if (From) { TAG.addEdge(*From, *TNId); + } } } @@ -592,8 +625,9 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, auto From = TAG.get({Variable{Cast->getOperand(0)}}); auto To = TAG.get({Variable{Cast}}); - if (From && To) + if (From && To) { TAG.addEdge(*From, *To); + } } if (const auto *Call = llvm::dyn_cast(&I)) { handleCall(Call, TAG, BaseCG, VTP); @@ -658,22 +692,6 @@ auto analysis::call_graph::computeTypeAssignmentGraph( VTP); } -auto analysis::call_graph::computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - const ObjectGraph &ObjGraph, const psr::LLVMVFTableProvider &VTP) - -> TypeAssignmentGraph { - AliasInfo AI(&ObjGraph); - FilteredAliasSet FAS(AI.aliases()); - return computeTypeAssignmentGraphImpl( - Mod, BaseCG, - [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { - FAS.foreachAlias(Fact, At, Handler); - }, - VTP); -} - void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { OS << "digraph TAG {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp index 4fe8f959aa..b9f8afdf81 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp @@ -19,18 +19,16 @@ using namespace psr::analysis::call_graph; static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { + const SCCHolder &SCCs) { for (const auto &[Node, Types] : TAG.TypeEntryPoints) { auto SCC = SCCs.SCCOfNode[size_t(Node)]; TA.TypesPerSCC[size_t(SCC)].insert(Types.begin(), Types.end()); } } -static void propagate( - TypeAssignment &TA, - const SCCCallers - &Deps, - SCCId CurrSCC) { +static void propagate(TypeAssignment &TA, + const SCCCallers &Deps, + SCCId CurrSCC) { const auto &Types = TA.TypesPerSCC[size_t(CurrSCC)]; if (Types.empty()) { return; @@ -43,10 +41,8 @@ static void propagate( TypeAssignment analysis::call_graph::propagateTypes( const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs, - const SCCCallers - &Deps, - const SCCOrder &Order) { + const SCCHolder &SCCs, + const SCCCallers &Deps, const SCCOrder &Order) { TypeAssignment Ret; Ret.TypesPerSCC.resize(SCCs.NumSCCs); @@ -60,7 +56,7 @@ TypeAssignment analysis::call_graph::propagateTypes( void TypeAssignment::print( llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { + const SCCHolder &SCCs) { OS << "digraph TypeAssignment {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 1f59fc55bc..4c47982ef1 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -29,8 +29,9 @@ class ExampleGraph { ExampleGraph() = default; - [[nodiscard]] std::vector getEdges(const GraphNodeId ID) const { - return Adj[uint32_t(ID)]; + [[nodiscard]] std::vector + getEdges(const GraphNodeId Vertex) const { + return Adj[uint32_t(Vertex)]; } std::vector> Adj; }; From 88adccb7263e6d2ee8d39b078dea6988382eda2e Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Sun, 8 Sep 2024 13:14:52 +0200 Subject: [PATCH 08/27] SCCGeneric.h corrections --- include/phasar/Utils/SCCGeneric.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 7f883b715d..d5b3574642 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -151,6 +151,7 @@ static void tarjanSCCIt(const G &Graph, uint32_t Unvisited = UINT32_MAX; auto CurrTime = 0; for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { + // std::cout << std::to_string(Vertex) << "\n"; if (Data.Disc[GraphNodeId(Vertex)] == Unvisited) { Data.CallStack.push_back({GraphNodeId(Vertex), 0}); while (!Data.CallStack.empty()) { @@ -183,7 +184,7 @@ static void tarjanSCCIt(const G &Graph, // start a recursive function call if (Curr.second < Graph.getEdges(Curr.first).size()) { GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; - Data.CallStack.push_back({Curr.first, (Curr.second++)}); + Data.CallStack.push_back({Curr.first, Curr.second + 1}); Data.CallStack.push_back({U, 0}); continue; } From 8ea087d8cac057f0a847e6f38e6e38facfda8fbc Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Mon, 16 Sep 2024 15:06:49 +0200 Subject: [PATCH 09/27] SCC Tests added and VTACallGraphTest ported --- include/phasar/AnalysisConfig.h | 29 +++ .../PhasarLLVM/ControlFlow/call_graph.h | 42 ++++ include/phasar/Utils/TypedVector.h | 6 +- unittests/Utils/CMakeLists.txt | 1 + unittests/Utils/SCCGenericTest.cpp | 71 +++++- unittests/Utils/VTACallGraphTest.cpp | 218 ++++++++++++++++++ 6 files changed, 354 insertions(+), 13 deletions(-) create mode 100644 include/phasar/AnalysisConfig.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/call_graph.h create mode 100644 unittests/Utils/VTACallGraphTest.cpp diff --git a/include/phasar/AnalysisConfig.h b/include/phasar/AnalysisConfig.h new file mode 100644 index 0000000000..f3f22ee796 --- /dev/null +++ b/include/phasar/AnalysisConfig.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_ANALYSISCONFIG_H +#define PHASAR_ANALYSISCONFIG_H + +#include "nlohmann/json.hpp" + +#include +#include + +namespace t2 { +struct AnalysisConfig { + std::string OutputFile; + bool TreatWarningsAsError = false; + + std::optional PrecomputedCG; + std::optional PrecomputedAA; + /// TODO: More config options +}; +} // namespace t2 + +#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/call_graph.h b/include/phasar/PhasarLLVM/ControlFlow/call_graph.h new file mode 100644 index 0000000000..090d0bf75b --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/call_graph.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CALL_GRAPH_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_CALL_GRAPH_H + +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" + +#include "llvm/IR/Module.h" + +namespace psr { + +[[nodiscard]] psr::CallGraph +computeVTACallgraph(const llvm::Module &Mod, + const psr::CallGraph &BaseCG, + psr::LLVMAliasInfoRef AS, + const psr::LLVMVFTableProvider &VTP); + +namespace analysis::call_graph { +struct ObjectGraph; +} // namespace analysis::call_graph + +[[nodiscard]] psr::CallGraph +computeVTACallgraph(const llvm::Module &Mod, + const psr::CallGraph &BaseCG, + const analysis::call_graph::ObjectGraph &ObjGraph, + const psr::LLVMVFTableProvider &VTP); + +} // namespace psr + +#endif diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index 0f315153a4..7ab8dacb00 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -52,17 +52,17 @@ class TypedVector { } [[nodiscard]] ByConstRef operator[](IdT Id) const & { - assert(size_t(Id) == size()); + assert(inbounds(Id)); return Vec[size_t(Id)]; } [[nodiscard]] ValueT &operator[](IdT Id) & { - assert(size_t(Id) < size()); // was == before + assert(inbounds(Id)); return Vec[size_t(Id)]; } [[nodiscard]] ValueT operator[](IdT Id) && { - assert(size_t(Id) < size()); // was == before + assert(inbounds(Id)); return std::move(Vec[size_t(Id)]); } diff --git a/unittests/Utils/CMakeLists.txt b/unittests/Utils/CMakeLists.txt index d84306e783..06c6717dca 100644 --- a/unittests/Utils/CMakeLists.txt +++ b/unittests/Utils/CMakeLists.txt @@ -11,6 +11,7 @@ set(UtilsSources OnTheFlyAnalysisPrinterTest.cpp SourceMgrPrinterTest.cpp SCCGenericTest.cpp + VTACallGraphTest.cpp ) if(PHASAR_ENABLE_DYNAMIC_LOG) diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 4c47982ef1..0f4f345962 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -36,18 +36,69 @@ class ExampleGraph { std::vector> Adj; }; +static void computeSCCsAndCompare(ExampleGraph &Graph) { + auto OutputRec = analysis::call_graph::execTarjan(Graph, false); + auto OutputIt = analysis::call_graph::execTarjan(Graph, true); + ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) + << "Iterative Approach did not reach all nodes\n"; + ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) + << "Recursive Approach did not reach all nodes\n"; + EXPECT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) + << "Unequal number of SCC components\n"; + /*std::cout << std::to_string(OutputRec.NumSCCs) << " " + << std::to_string(OutputIt.NumSCCs) << "\n";*/ + for (size_t ID = 0; ID < Graph.Adj.size(); ID++) { + EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) + << "SCCs differ at Index: " << std::to_string(ID) << "\n"; + } +} + TEST(SCCGenericTest, SCCTest) { using GraphNodeId = ExampleGraph::GraphNodeId; - ExampleGraph Graph{{{GraphNodeId(2)}, - {GraphNodeId(0)}, - {GraphNodeId(1)}, - {GraphNodeId(1), GraphNodeId(2)}, - {GraphNodeId(1)}, - {GraphNodeId(4), GraphNodeId(6)}, - {GraphNodeId(4), GraphNodeId(7)}, - {GraphNodeId(5)}}}; + ExampleGraph GraphOne{{{GraphNodeId(2)}, + {GraphNodeId(0)}, + {GraphNodeId(1)}, + {GraphNodeId(1), GraphNodeId(2)}, + {GraphNodeId(1)}, + {GraphNodeId(4), GraphNodeId(6)}, + {GraphNodeId(4), GraphNodeId(7)}, + {GraphNodeId(5)}}}; - auto OutputRec = analysis::call_graph::execTarjan(Graph, false); + ExampleGraph GraphTwo{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}; + + ExampleGraph GraphThree{{{GraphNodeId(1)}, + {GraphNodeId(2)}, + {GraphNodeId(3)}, + {GraphNodeId(4)}, + {GraphNodeId(5)}, + {GraphNodeId(6)}, + {GraphNodeId(0)}}}; + + ExampleGraph GraphFour{ + {{GraphNodeId(1), GraphNodeId(2), GraphNodeId(3), GraphNodeId(4)}, + {GraphNodeId(0), GraphNodeId(2), GraphNodeId(3), GraphNodeId(4)}, + {GraphNodeId(0), GraphNodeId(1), GraphNodeId(3), GraphNodeId(4)}, + {GraphNodeId(0), GraphNodeId(1), GraphNodeId(2), GraphNodeId(4)}, + {GraphNodeId(0), GraphNodeId(1), GraphNodeId(2), GraphNodeId(3)}}}; + + ExampleGraph GraphFive{{{GraphNodeId(1)}, + {GraphNodeId(2)}, + {GraphNodeId(3), GraphNodeId(4)}, + {GraphNodeId(5)}, + {GraphNodeId(5)}, + {GraphNodeId(2), GraphNodeId(6)}, + {GraphNodeId(7)}, + {GraphNodeId(1), GraphNodeId(8)}, + {}}}; + + std::vector TestGraphs = {GraphOne, GraphTwo, GraphThree, + GraphFour, GraphFive}; + + for (size_t Index = 0; Index < TestGraphs.size(); Index++) { + computeSCCsAndCompare(TestGraphs[Index]); + } + + /*auto OutputRec = analysis::call_graph::execTarjan(Graph, false); auto OutputIt = analysis::call_graph::execTarjan(Graph, true); ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) << "Iterative Approach did not reach all nodes\n"; @@ -58,7 +109,7 @@ TEST(SCCGenericTest, SCCTest) { for (size_t ID = 0; ID < Graph.Adj.size(); ID++) { EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) << "SCCs differ at Index: " << std::to_string(ID) << "\n"; - } + }*/ } // main function for the test case diff --git a/unittests/Utils/VTACallGraphTest.cpp b/unittests/Utils/VTACallGraphTest.cpp new file mode 100644 index 0000000000..c4700a87ee --- /dev/null +++ b/unittests/Utils/VTACallGraphTest.cpp @@ -0,0 +1,218 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/AnalysisConfig.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/call_graph.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +#include + +namespace { +[[nodiscard]] std::string printStringSet(const std::set &Set) { + std::string Ret; + llvm::raw_string_ostream OS(Ret); + llvm::interleaveComma(Set, OS << "{ "); + OS << " }"; + return Ret; +} +///////////////////////////// +psr::LLVMBasedICFG createBaseCG(psr::LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + const psr::LLVMTypeHierarchy &TH, + psr::LLVMAliasInfoRef /*PT*/) { + psr::RTAResolver Res(&IRDB, &VTP, &TH); + + std::vector EntryPoints; + /////////////////////////////////// + if (IRDB.getFunctionDefinition("main")) { + EntryPoints.emplace_back("main"); + } else { + for (const auto *F : IRDB.getAllFunctions()) { + if (!F->isDeclaration() && F->hasExternalLinkage()) { + EntryPoints.emplace_back(F->getName()); + } + } + } + /////////////////////////////////// + return psr::LLVMBasedICFG(&IRDB, Res, EntryPoints, psr::Soundness::Soundy); +} +////////////////////////////// +class VTACallGraphTest : public ::testing::Test { +protected: + static constexpr llvm::StringLiteral PathToLLFiles = + "/build/phasar/test/llvm_test_code/"; + + struct GroundTruthEntry { + size_t CSId; + std::set Callees; + }; + + void doAnalysisAndCompareResults(const llvm::Twine &IRFile, + llvm::ArrayRef GT) { + ASSERT_FALSE(GT.empty()) << "No Ground-Truth provided!"; + + auto IRDB = std::make_unique(PathToLLFiles + IRFile); + ASSERT_TRUE(IRDB->isValid()); + + psr::LLVMVFTableProvider VTP(*IRDB); + psr::LLVMTypeHierarchy TH(*IRDB); + psr::LLVMAliasSet AS(IRDB.get()); + // implement function locally + auto BaseCG = createBaseCG(*IRDB, VTP, TH, &AS); + + auto CG = psr::computeVTACallgraph(*IRDB->getModule(), + BaseCG.getCallGraph(), &AS, VTP); + + for (const auto &Entry : GT) { + const auto *CS = IRDB->getInstruction(Entry.CSId); + ASSERT_NE(nullptr, CS); + ASSERT_TRUE(llvm::isa(CS)); + auto &&Callees = CG.getCalleesOfCallAt(CS); + + EXPECT_EQ(Entry.Callees.size(), Callees.size()); + + auto GTCallees = Entry.Callees; + for (const auto *Callee : Callees) { + auto CalleeName = Callee->getName(); + EXPECT_TRUE(Entry.Callees.count(CalleeName)) + << "Did not expect function '" << CalleeName.str() + << "' being called at " << psr::llvmIRToString(CS); + GTCallees.erase(CalleeName); + } + + EXPECT_TRUE(GTCallees.empty()) + << "Expected callees not found at " << psr::llvmIRToString(CS) << ": " + << printStringSet(GTCallees); + } + } +}; + +TEST_F(VTACallGraphTest, VirtualCallSite_InterProcCallSite) { + doAnalysisAndCompareResults("virtual_callsites/interproc_callsite_cpp.ll", + { + {16, {"_ZN7Derived3barEv"}}, + }); +} + +TEST_F(VTACallGraphTest, UninitializedVariables_VirtualCall) { + doAnalysisAndCompareResults("uninitialized_variables/virtual_call_cpp_dbg.ll", + { + {34, {"_Z3barRi", "_Z3fooRi"}}, + }); +} + +TEST_F(VTACallGraphTest, PathTracing_Inter12) { + // Note: The VTA analysis is not flow-sensitive + doAnalysisAndCompareResults( + "path_tracing/inter_12_cpp_dbg.ll", + { + {33, {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, + {45, {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, + }); +} + +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer1) { + doAnalysisAndCompareResults("call_graphs/function_pointer_1_c.ll", + { + {5, {"bar"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer2) { + doAnalysisAndCompareResults("call_graphs/function_pointer_2_cpp.ll", + { + {8, {"_Z3barv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer3) { + // Note: Although bar is assigned (and part of the TAG), is does not qualify + // as psr::isConsistentCall() + doAnalysisAndCompareResults("call_graphs/function_pointer_3_cpp.ll", + { + {11, {/*"_Z3bari",*/ "_Z3foov"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall2) { + doAnalysisAndCompareResults("call_graphs/virtual_call_2_cpp.ll", + { + {20, {"_ZN1B3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall3) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults("call_graphs/virtual_call_3_cpp_dbg.ll", + { + {22, {"_ZN5AImpl3fooEv"}}, + {30, {"_ZN5AImplD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall4) { + doAnalysisAndCompareResults("call_graphs/virtual_call_4_cpp.ll", + { + {20, {"_ZN1B3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall5) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults("call_graphs/virtual_call_5_cpp_dbg.ll", + { + {24, {"_ZN1B5VfuncEv"}}, + {32, {"_ZN1BD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall7) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + doAnalysisAndCompareResults("call_graphs/virtual_call_7_cpp_dbg.ll", + { + {28, {"_ZN1A5VfuncEv"}}, + {34, {"_ZN1B5VfuncEv"}}, + {42, {"_ZN1AD0Ev"}}, + }); +} +TEST_F(VTACallGraphTest, DISABLED_CallGraphs_VirtualCall8) { + + // Use the dbg version, because VTA relies on !heapallocsite metadata + // Note: The VTA analysis is neither flow-, nor context-sensitive + doAnalysisAndCompareResults( + "call_graphs/virtual_call_8_cpp_dbg.ll", + { + {26, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {32, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + }); +} +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { + // Use the dbg version, because VTA relies on !heapallocsite metadata + // Note: The VTA analysis is neither flow-, nor context-sensitive + doAnalysisAndCompareResults( + "call_graphs/virtual_call_9_cpp_dbg.ll", + { + {85, {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, + {93, {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, + }); +} +// TODO: More tests! + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} From 77065135b5f7232cab1cfa74d0d599bbae4f8d4d Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Mon, 16 Sep 2024 15:07:54 +0200 Subject: [PATCH 10/27] AnalysisConfig.h modified --- include/phasar/AnalysisConfig.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/AnalysisConfig.h b/include/phasar/AnalysisConfig.h index f3f22ee796..30aa18fab9 100644 --- a/include/phasar/AnalysisConfig.h +++ b/include/phasar/AnalysisConfig.h @@ -15,7 +15,7 @@ #include #include -namespace t2 { +namespace psr { struct AnalysisConfig { std::string OutputFile; bool TreatWarningsAsError = false; @@ -24,6 +24,6 @@ struct AnalysisConfig { std::optional PrecomputedAA; /// TODO: More config options }; -} // namespace t2 +} // namespace psr #endif From 4d1d4d20dd25e2accd4912d2ed3efd83d44ea4e5 Mon Sep 17 00:00:00 2001 From: bulletSpace Date: Fri, 11 Oct 2024 13:52:04 +0200 Subject: [PATCH 11/27] new test case in SCCGenericTest.cpp --- unittests/Utils/SCCGenericTest.cpp | 22 ++++++++++++++++++++-- unittests/Utils/VTACallGraphTest.cpp | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 0f4f345962..6b51b99607 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -91,8 +91,26 @@ TEST(SCCGenericTest, SCCTest) { {GraphNodeId(1), GraphNodeId(8)}, {}}}; - std::vector TestGraphs = {GraphOne, GraphTwo, GraphThree, - GraphFour, GraphFive}; + ExampleGraph GraphSix{{{GraphNodeId(1)}, + {GraphNodeId(2)}, + {GraphNodeId(3)}, + {GraphNodeId(4)}, + {GraphNodeId(5)}, + {GraphNodeId(6)}, + {GraphNodeId(7)}, + {GraphNodeId(0)}, + {GraphNodeId(9)}, + {GraphNodeId(10)}, + {GraphNodeId(11)}, + {GraphNodeId(12)}, + {GraphNodeId(13), GraphNodeId(4)}, + {GraphNodeId(8)}, + {GraphNodeId(9)}, + {GraphNodeId(3)}, + {GraphNodeId(5)}}}; + + std::vector TestGraphs = {GraphOne, GraphTwo, GraphThree, + GraphFour, GraphFive, GraphSix}; for (size_t Index = 0; Index < TestGraphs.size(); Index++) { computeSCCsAndCompare(TestGraphs[Index]); diff --git a/unittests/Utils/VTACallGraphTest.cpp b/unittests/Utils/VTACallGraphTest.cpp index c4700a87ee..db03840435 100644 --- a/unittests/Utils/VTACallGraphTest.cpp +++ b/unittests/Utils/VTACallGraphTest.cpp @@ -7,7 +7,7 @@ * Fabian Schiebel and others *****************************************************************************/ -#include "phasar/AnalysisConfig.h" +// #include "phasar/AnalysisConfig.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/call_graph.h" From 17e5b7dd7f6f3370b0a4d0bd93cf54eb65295c58 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 2 Sep 2025 12:35:29 +0200 Subject: [PATCH 12/27] Update TAG, SCCs, and AdjacencyList to use TypedVector --- .../{ => VTA}/TypeAssignmentGraph.h | 38 +- .../ControlFlow/{ => VTA}/TypePropagator.h | 30 +- .../ControlFlow/{ => VTA}/call_graph.h | 0 .../PhasarLLVM/Utils/FilteredAliasSet.h | 68 --- include/phasar/Utils/AdjacencyList.h | 110 ++-- include/phasar/Utils/GraphTraits.h | 144 +++-- include/phasar/Utils/IotaIterator.h | 29 +- include/phasar/Utils/SCCGeneric.h | 515 +++++++++++------- include/phasar/Utils/TypeTraits.h | 20 + include/phasar/Utils/TypedVector.h | 24 +- .../VTA}/TypeAssignmentGraph.cpp | 102 +--- .../ControlFlow/VTA/TypePropagator.cpp | 87 +++ .../DataFlow/IfdsIde/TypePropagator.cpp | 88 --- lib/PhasarLLVM/Utils/FilteredAliasSet.cpp | 121 ---- unittests/Utils/SCCGenericTest.cpp | 135 +++-- unittests/Utils/VTACallGraphTest.cpp | 2 +- 16 files changed, 714 insertions(+), 799 deletions(-) rename include/phasar/PhasarLLVM/ControlFlow/{ => VTA}/TypeAssignmentGraph.h (81%) rename include/phasar/PhasarLLVM/ControlFlow/{ => VTA}/TypePropagator.h (57%) rename include/phasar/PhasarLLVM/ControlFlow/{ => VTA}/call_graph.h (100%) delete mode 100644 include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h rename lib/PhasarLLVM/{DataFlow/IfdsIde => ControlFlow/VTA}/TypeAssignmentGraph.cpp (85%) create mode 100644 lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp delete mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp delete mode 100644 lib/PhasarLLVM/Utils/FilteredAliasSet.cpp diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h similarity index 81% rename from include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h rename to include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index 391379b15c..f4426a8883 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -22,21 +22,15 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/HashBuilder.h" #include "llvm/Support/raw_ostream.h" #include #include -namespace psr { -class FilteredAliasSet; -} // namespace psr +namespace psr::vta { -namespace psr::analysis::call_graph { - -enum class [[clang::enum_extensibility(open)]] TAGNodeId : uint32_t{}; +enum class TAGNodeId : uint32_t {}; struct Variable { const llvm::Value *Val; @@ -67,14 +61,14 @@ constexpr bool operator==(Return L, Return R) noexcept { constexpr bool operator==(TAGNode L, TAGNode R) noexcept { return L.Label == R.Label; } -}; // namespace psr::analysis::call_graph +}; // namespace psr::vta namespace llvm { -template <> struct DenseMapInfo { - using TAGNode = psr::analysis::call_graph::TAGNode; - using Variable = psr::analysis::call_graph::Variable; - using Field = psr::analysis::call_graph::Field; - using Return = psr::analysis::call_graph::Return; +template <> struct DenseMapInfo { + using TAGNode = psr::vta::TAGNode; + using Variable = psr::vta::Variable; + using Field = psr::vta::Field; + using Return = psr::vta::Return; inline static TAGNode getEmptyKey() noexcept { return {Variable{llvm::DenseMapInfo::getEmptyKey()}}; @@ -98,8 +92,8 @@ template <> struct DenseMapInfo { } }; -template <> struct DenseMapInfo { - using GraphNodeId = psr::analysis::call_graph::TAGNodeId; +template <> struct DenseMapInfo { + using GraphNodeId = psr::vta::TAGNodeId; inline static GraphNodeId getEmptyKey() noexcept { return GraphNodeId(-1); } inline static GraphNodeId getTombstoneKey() noexcept { return GraphNodeId(-2); @@ -114,8 +108,7 @@ template <> struct DenseMapInfo { } // namespace llvm -namespace psr::analysis::call_graph { -struct ObjectGraph; +namespace psr::vta { struct TypeAssignmentGraph { using GraphNodeId = TAGNodeId; @@ -148,13 +141,18 @@ struct TypeAssignmentGraph { void print(llvm::raw_ostream &OS); }; +using AliasHandlerTy = llvm::function_ref; +using AliasInfoTy = llvm::function_ref; + +// TODO: Use AliasIterator here, once available [[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( const llvm::Module &Mod, const psr::CallGraph &BaseCG, - psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP); + AliasInfoTy AS, const psr::LLVMVFTableProvider &VTP); void printNode(llvm::raw_ostream &OS, TAGNode TN); -}; // namespace psr::analysis::call_graph +}; // namespace psr::vta #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h similarity index 57% rename from include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h rename to include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h index ceab9bcd3c..680e8f238d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h @@ -10,35 +10,39 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H #define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEPROPAGATOR_H -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" namespace llvm { class Value; } // namespace llvm -namespace psr::analysis::call_graph { -struct TypeAssignmentGraph; +namespace psr { +template struct SCCId; template struct SCCHolder; -template struct SCCCallers; -struct SCCOrder; +template struct SCCDependencyGraph; +template struct SCCOrder; +} // namespace psr + +namespace psr::vta { +struct TypeAssignmentGraph; +enum class TAGNodeId : uint32_t; struct TypeAssignment { - llvm::SmallVector, 0> TypesPerSCC; + TypedVector, llvm::SmallDenseSet> + TypesPerSCC; void print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, const SCCHolder &SCCs); }; [[nodiscard]] TypeAssignment -propagateTypes(const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs, - const SCCCallers &Deps, - const SCCOrder &Order); +propagateTypes(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, + const SCCDependencyGraph &Deps, + const SCCOrder &Order); -} // namespace psr::analysis::call_graph +} // namespace psr::vta #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/call_graph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h similarity index 100% rename from include/phasar/PhasarLLVM/ControlFlow/call_graph.h rename to include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h diff --git a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h b/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h deleted file mode 100644 index cdcec3c1f9..0000000000 --- a/include/phasar/PhasarLLVM/Utils/FilteredAliasSet.h +++ /dev/null @@ -1,68 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#ifndef PHASAR_PHASARLLVM_UTILS_FILTEREDALIASSET_H -#define PHASAR_PHASARLLVM_UTILS_FILTEREDALIASSET_H - -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FunctionExtras.h" -#include "llvm/ADT/STLFunctionalExtras.h" - -#include - -namespace llvm { -class Value; -class Instruction; -} // namespace llvm - -namespace psr { - -class FilteredAliasSet { -public: - using d_t = const llvm::Value *; - using n_t = const llvm::Instruction *; - using container_type = std::set; - - using alias_handler_t = llvm::function_ref; - using alias_info_ref_t = - llvm::function_ref; - using alias_info_t = - llvm::unique_function; - - FilteredAliasSet(alias_info_t &&PT) noexcept : PT(std::move(PT)) { - assert(this->PT); - } - - explicit FilteredAliasSet(psr::LLVMAliasInfoRef AS) - : PT([AS](const llvm::Value *Fact, auto Handler) { - for (const auto *Alias : *AS.getAliasSet(Fact)) { - Handler(Alias); - } - }) {} - - [[nodiscard]] container_type getAliasSet(d_t Val, n_t At); - [[nodiscard]] container_type getMustAliasSet(d_t Val, n_t /*At*/) { - return {Val}; - } - - void foreachAlias(d_t Fact, n_t At, llvm::function_ref WithAlias); - void foreachMustAlias(d_t Fact, n_t /*At*/, - llvm::function_ref WithAlias) { - WithAlias(Fact); - } - -private: - alias_info_t PT; -}; -} // namespace psr - -#endif diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index b69abe74cf..fbae8d348d 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -10,13 +10,13 @@ #ifndef PHASAR_UTILS_ADJACENCYLIST_H #define PHASAR_UTILS_ADJACENCYLIST_H +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" #include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/RepeatIterator.h" -#include "phasar/Utils/Utilities.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include @@ -25,23 +25,25 @@ namespace psr { -template struct AdjacencyList { - llvm::SmallVector Nodes{}; - llvm::SmallVector, 0> Adj{}; - llvm::SmallVector Roots{}; +template +struct AdjacencyList { + TypedVector Nodes{}; + TypedVector, 0> Adj{}; + llvm::SmallVector Roots{}; }; -template struct AdjacencyList { - llvm::SmallVector, 0> Adj{}; - llvm::SmallVector Roots{}; +template +struct AdjacencyList { + TypedVector, 0> Adj{}; + llvm::SmallVector Roots{}; }; /// A simple graph implementation based on an adjacency list -template -struct GraphTraits> { - using graph_type = AdjacencyList; +template +struct GraphTraits> { + using graph_type = AdjacencyList; using value_type = T; - using vertex_t = unsigned; + using vertex_t = VtxId; using edge_t = EdgeTy; using edge_iterator = typename llvm::ArrayRef::const_iterator; using roots_iterator = typename llvm::ArrayRef::const_iterator; @@ -53,8 +55,8 @@ struct GraphTraits> { /// Adds a new node to the graph G with node-tag Val /// /// \returns The vertex-descriptor for the newly created node - template >> + template >> static vertex_t addNode(graph_type &G, TT &&Val) { assert(G.Adj.size() == G.Nodes.size()); @@ -68,8 +70,8 @@ struct GraphTraits> { /// /// \returns The vertex-descriptor for the newly created node template >> - static vertex_t addNode(graph_type &G, llvm::NoneType /*Val*/ = llvm::None) { + typename = std::enable_if_t>> + static vertex_t addNode(graph_type &G, value_type /*Val*/ = {}) { auto Ret = G.Adj.size(); G.Adj.emplace_back(); return Ret; @@ -78,8 +80,8 @@ struct GraphTraits> { /// Makes the node Vtx as root in the graph G. A node should not be registered /// as root multiple times static void addRoot(graph_type &G, vertex_t Vtx) { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } G.Roots.push_back(Vtx); @@ -87,7 +89,7 @@ struct GraphTraits> { /// Gets a range of all root nodes of graph G static llvm::ArrayRef roots(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Roots; @@ -98,8 +100,8 @@ struct GraphTraits> { /// deduplicated automatically; to manually deduplicate the edges of one /// source-node, call dedupOutEdges() static void addEdge(graph_type &G, vertex_t From, edge_t To) { - assert(From < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(From)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } G.Adj[From].push_back(std::move(To)); @@ -108,8 +110,8 @@ struct GraphTraits> { /// Gets a range of all edges outgoing from node Vtx in graph G static llvm::ArrayRef outEdges(const graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj[Vtx]; @@ -117,8 +119,8 @@ struct GraphTraits> { /// Gets the number of edges outgoing from node Vtx in graph G static size_t outDegree(const graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj[Vtx].size(); @@ -127,8 +129,8 @@ struct GraphTraits> { /// Deduplicates the edges outgoing from node Vtx in graph G. Deduplication is /// based on operator< and operator== of the edge_t type static void dedupOutEdges(graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } auto &OutEdges = G.Adj[Vtx]; @@ -139,62 +141,62 @@ struct GraphTraits> { /// Gets a const range of all nodes in graph G template >> + typename = std::enable_if_t>> static llvm::ArrayRef nodes(const graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a mutable range of all nodes in graph G template >> + typename = std::enable_if_t>> static llvm::MutableArrayRef nodes(graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a range of all nodes in graph G template >> + typename = std::enable_if_t>> static RepeatRangeType nodes(const graph_type &G) noexcept { - return repeat(llvm::None, G.Adj.size()); + return repeat(value_type{}, G.Adj.size()); } /// Gets a range of vertex-descriptors for all nodes in graph G static auto vertices(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } - return psr::iota(vertex_t(0), G.Adj.size()); + return psr::iota(G.Adj.size()); } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> + typename = std::enable_if_t>> static const value_type &node(const graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Nodes.size()); + assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> + typename = std::enable_if_t>> static value_type &node(graph_type &G, vertex_t Vtx) noexcept { - assert(Vtx < G.Nodes.size()); + assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; } /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static llvm::NoneType node([[maybe_unused]] const graph_type &G, - [[maybe_unused]] vertex_t Vtx) noexcept { - assert(Vtx < G.Adj.size()); - return llvm::None; + typename = std::enable_if_t>> + static value_type node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return {}; } /// Gets the number of nodes in graph G static size_t size(const graph_type &G) noexcept { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Adj.size(); @@ -202,7 +204,7 @@ struct GraphTraits> { /// Gets the number of nodes in graph G that are marked as root static size_t roots_size(const graph_type &G) noexcept { // NOLINT - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } return G.Roots.size(); @@ -210,7 +212,7 @@ struct GraphTraits> { /// Pre-allocates space to hold up to Capacity nodes static void reserve(graph_type &G, size_t Capacity) { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); G.Nodes.reserve(Capacity); } @@ -225,7 +227,7 @@ struct GraphTraits> { static bool pop(graph_type &G, vertex_t Vtx) { if (Vtx == G.Adj.size() - 1) { G.Adj.pop_back(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { G.Nodes.pop_back(); } return true; @@ -250,9 +252,7 @@ struct GraphTraits> { } /// Gets the weight associated with the given edge - static llvm::NoneType weight(edge_t /*unused*/) noexcept { - return llvm::None; - } + static EmptyType weight(edge_t /*unused*/) noexcept { return {}; } /// Removes the edge denoted by It outgoing from source-vertex Vtx from the /// graph G. This function is not required by the is_graph_trait concept. @@ -261,8 +261,8 @@ struct GraphTraits> { /// continue iteration instead of std::next(It) static edge_iterator removeEdge(graph_type &G, vertex_t Vtx, edge_iterator It) noexcept { - assert(Vtx < G.Adj.size()); - if constexpr (!std::is_same_v) { + assert(G.Adj.inbounds(Vtx)); + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } assert(G.Adj[Vtx].begin() <= It && It < G.Adj[Vtx].end()); @@ -279,7 +279,7 @@ struct GraphTraits> { /// \returns A roots_iterator directly following It that should be used to /// continue iteration instead of std::next(It) static roots_iterator removeRoot(graph_type &G, roots_iterator It) noexcept { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } assert(G.Roots.begin() <= It && It < G.Roots.end()); @@ -291,9 +291,9 @@ struct GraphTraits> { } #if __cplusplus >= 202002L - static_assert(is_graph>); + static_assert(is_graph); #endif - static_assert(is_reservable_graph_trait_v>>); + static_assert(is_reservable_graph_trait_v); }; } // namespace psr diff --git a/include/phasar/Utils/GraphTraits.h b/include/phasar/Utils/GraphTraits.h index de96b9cc20..bdfd82f212 100644 --- a/include/phasar/Utils/GraphTraits.h +++ b/include/phasar/Utils/GraphTraits.h @@ -42,49 +42,82 @@ concept is_graph_edge = requires(const Edge e1, Edge e2) { }; template -concept is_graph_trait = requires(typename GraphTrait::graph_type &graph, - const typename GraphTrait::graph_type &cgraph, - typename GraphTrait::value_type val, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_t edge) { - typename GraphTrait::graph_type; - typename GraphTrait::value_type; - typename GraphTrait::vertex_t; - typename GraphTrait::edge_t; - requires is_graph_edge; - { GraphTrait::Invalid } -> std::convertible_to; - { - GraphTrait::addNode(graph, val) - } -> std::convertible_to; - {GraphTrait::addEdge(graph, vtx, edge)}; - { - GraphTrait::outEdges(cgraph, vtx) - } -> psr::is_iterable_over_v; - { GraphTrait::outDegree(cgraph, vtx) } -> std::convertible_to; - {GraphTrait::dedupOutEdges(graph, vtx)}; - { - GraphTrait::nodes(cgraph) - } -> psr::is_iterable_over_v; - { - GraphTrait::vertices(cgraph) - } -> psr::is_iterable_over_v; - { - GraphTrait::node(cgraph, vtx) - } -> std::convertible_to; - { GraphTrait::size(cgraph) } -> std::convertible_to; - {GraphTrait::addRoot(graph, vtx)}; - { - GraphTrait::roots(cgraph) - } -> psr::is_iterable_over_v; - { GraphTrait::pop(graph, vtx) } -> std::same_as; - { GraphTrait::roots_size(cgraph) } -> std::convertible_to; - { - GraphTrait::target(edge) - } -> std::convertible_to; - { - GraphTrait::withEdgeTarget(edge, vtx) - } -> std::convertible_to; - {GraphTrait::weight(edge)}; +concept is_const_graph_trait = + requires(const typename GraphTrait::graph_type &cgraph, + typename GraphTrait::value_type val, + typename GraphTrait::vertex_t vtx, + typename GraphTrait::edge_t edge) { + typename GraphTrait::graph_type; + typename GraphTrait::value_type; + typename GraphTrait::vertex_t; + typename GraphTrait::edge_t; + requires is_graph_edge; + + { + GraphTrait::Invalid + } -> std::convertible_to; + + { + GraphTrait::outEdges(cgraph, vtx) + } -> psr::is_iterable_over_v; + { GraphTrait::outDegree(cgraph, vtx) } -> std::convertible_to; + { + GraphTrait::nodes(cgraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::roots(cgraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::vertices(cgraph) + } -> psr::is_iterable_over_v; + { + GraphTrait::node(cgraph, vtx) + } -> std::convertible_to; + { GraphTrait::size(cgraph) } -> std::convertible_to; + { GraphTrait::roots_size(cgraph) } -> std::convertible_to; + { + GraphTrait::target(edge) + } -> std::convertible_to; + { + GraphTrait::withEdgeTarget(edge, vtx) + } -> std::convertible_to; + }; + +template +concept is_graph_trait = + is_const_graph_trait && + requires(typename GraphTrait::graph_type &graph, + typename GraphTrait::value_type val, + typename GraphTrait::vertex_t vtx, + typename GraphTrait::edge_t edge) { + typename GraphTrait::graph_type; + typename GraphTrait::value_type; + typename GraphTrait::vertex_t; + typename GraphTrait::edge_t; + requires is_graph_edge; + { + GraphTrait::Invalid + } -> std::convertible_to; + { + GraphTrait::addNode(graph, val) + } -> std::convertible_to; + { GraphTrait::addEdge(graph, vtx, edge) }; + { GraphTrait::dedupOutEdges(graph, vtx) }; + { GraphTrait::addRoot(graph, vtx) }; + { GraphTrait::pop(graph, vtx) } -> std::same_as; + { + GraphTrait::target(edge) + } -> std::convertible_to; + { + GraphTrait::withEdgeTarget(edge, vtx) + } -> std::convertible_to; + { GraphTrait::weight(edge) }; + }; + +template +concept is_const_graph = requires(Graph g) { + typename GraphTraits>; + requires is_const_graph_trait>>; }; template @@ -94,22 +127,23 @@ concept is_graph = requires(Graph g) { }; template -concept is_reservable_graph_trait_v = is_graph_trait && - requires(typename GraphTrait::graph_type &g) { - {GraphTrait::reserve(g, size_t(0))}; -}; +concept is_reservable_graph_trait_v = + is_graph_trait && requires(typename GraphTrait::graph_type &g) { + { GraphTrait::reserve(g, size_t(0)) }; + }; template -concept is_removable_graph_trait_v = is_graph_trait && +concept is_removable_graph_trait_v = + is_graph_trait && requires(typename GraphTrait::graph_type &g, typename GraphTrait::vertex_t vtx, typename GraphTrait::edge_iterator edge_it, typename GraphTrait::roots_iterator root_it) { - typename GraphTrait::edge_iterator; - typename GraphTrait::roots_iterator; - {GraphTrait::removeEdge(g, vtx, edge_it)}; - {GraphTrait::removeRoot(g, root_it)}; -}; + typename GraphTrait::edge_iterator; + typename GraphTrait::roots_iterator; + { GraphTrait::removeEdge(g, vtx, edge_it) }; + { GraphTrait::removeRoot(g, root_it) }; + }; #else namespace detail { @@ -155,7 +189,7 @@ static constexpr bool is_removable_graph_trait_v = template std::decay_t reverseGraph(GraphTy &&G) #if __cplusplus >= 202002L - requires is_graph + requires is_graph #endif { std::decay_t Ret; @@ -193,7 +227,7 @@ template void printGraph(const GraphTy &G, llvm::raw_ostream &OS, llvm::StringRef Name = "", NodeTransform NodeToString = {}) #if __cplusplus >= 202002L - requires is_graph + requires is_graph #endif { using traits_t = GraphTraits; diff --git a/include/phasar/Utils/IotaIterator.h b/include/phasar/Utils/IotaIterator.h index 9b55162717..facd03efcc 100644 --- a/include/phasar/Utils/IotaIterator.h +++ b/include/phasar/Utils/IotaIterator.h @@ -32,7 +32,11 @@ template class IotaIterator { constexpr pointer operator->() const noexcept { return &Elem; } constexpr IotaIterator &operator++() noexcept { - ++Elem; + if constexpr (is_incrementable) { + ++Elem; + } else { + Elem = T(size_t(Elem) + 1); + } return *this; } constexpr IotaIterator operator++(int) noexcept { @@ -56,16 +60,27 @@ template class IotaIterator { T Elem{}; }; -template -using IotaRangeType = llvm::iterator_range>; -template -constexpr auto iota(T From, type_identity_t To) noexcept { - static_assert(std::is_integral_v, "Iota only works on integers"); - using iterator_type = IotaIterator>; +template +using IotaRangeType = llvm::iterator_range>; + +template +[[nodiscard]] constexpr auto iota(IdT From, type_identity_t To) noexcept { + static_assert(is_explicitly_convertible_to && + is_explicitly_convertible_to, + "Iota only works on integers and integer-like types"); + using iterator_type = IotaIterator>; auto Ret = llvm::make_range(iterator_type(From), iterator_type(To)); return Ret; } +template [[nodiscard]] constexpr auto iota(size_t To) noexcept { + static_assert(is_explicitly_convertible_to && + is_explicitly_convertible_to, + "Iota only works on integers and integer-like types"); + using iterator_type = IotaIterator>; + return llvm::make_range(iterator_type(), iterator_type(IdT(To))); +} + static_assert(is_iterable_over_v, int>); } // namespace psr diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index d5b3574642..26d773b1e6 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -10,61 +10,168 @@ #ifndef PHASAR_UTILS_SCCGENERIC_H #define PHASAR_UTILS_SCCGENERIC_H -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" #include "phasar/Utils/BitSet.h" +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" +#include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/RepeatIterator.h" #include "phasar/Utils/TypedVector.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/HashBuilder.h" -#include "llvm/Support/raw_ostream.h" #include -#include -#include namespace psr { class LLVMBasedICFG; } // namespace psr -namespace psr::analysis::call_graph { +namespace psr { + +namespace detail { +struct SCCIdBase { + uint32_t Value{}; + + constexpr SCCIdBase() noexcept = default; + + explicit constexpr SCCIdBase(uint32_t Val) noexcept : Value(Val) {} + + explicit constexpr operator uint32_t() const noexcept { return Value; } + template >> + explicit constexpr operator size_t() const noexcept { + return Value; + } -enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; + constexpr uint32_t operator+() const noexcept { return Value; } + + friend constexpr bool operator==(SCCIdBase L, SCCIdBase R) noexcept { + return L.Value == R.Value; + } + friend constexpr bool operator!=(SCCIdBase L, SCCIdBase R) noexcept { + return !(L == R); + } +}; +} // namespace detail + +template struct SCCId : detail::SCCIdBase { + using detail::SCCIdBase::SCCIdBase; +}; + +} // namespace psr + +namespace llvm { +template struct DenseMapInfo> { + using SCCId = psr::SCCId; + + static constexpr SCCId getEmptyKey() noexcept { return SCCId(UINT32_MAX); } + static constexpr SCCId getTombstoneKey() noexcept { + return SCCId(UINT32_MAX - 1); + } + + static auto getHashValue(SCCId SCC) noexcept { + return llvm::hash_value(uint32_t(SCC)); + } + static constexpr bool isEqual(SCCId SCC1, SCCId SCC2) noexcept { + return SCC1 == SCC2; + } +}; +} // namespace llvm + +namespace psr { // holds the scc's of a given graph template struct SCCHolder { - llvm::SmallVector SCCOfNode; - llvm::SmallVector> NodesInSCC{}; - size_t NumSCCs = 0; + TypedVector, 0> SCCOfNode; + TypedVector, llvm::SmallVector> + NodesInSCC{}; + + [[nodiscard]] size_t size() const noexcept { return NodesInSCC.size(); } + [[nodiscard]] bool empty() const noexcept { return NodesInSCC.empty(); } }; -// holds a graph were the scc's are compressed to a single node. Resulting graph +// holds a graph were the scc's are collapsed to a single node. Resulting graph // is a DAG -template struct SCCCallers { - llvm::SmallVector, 0> ChildrenOfSCC; - llvm::SmallVector SCCRoots; +template struct SCCDependencyGraph { + TypedVector, llvm::SmallDenseSet>> + ChildrenOfSCC; + llvm::SmallVector, 0> SCCRoots; +}; + +template +struct GraphTraits> { + using graph_type = SCCDependencyGraph; + using value_type = EmptyType; + using vertex_t = SCCId; + using edge_t = vertex_t; + + static inline constexpr auto Invalid = vertex_t(UINT32_MAX); + + [[nodiscard]] static const auto &outEdges(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return G.ChildrenOfSCC[Vtx]; + } + + [[nodiscard]] static size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return G.ChildrenOfSCC[Vtx].size(); + } + + [[nodiscard]] static RepeatRangeType + nodes(const graph_type &G) noexcept { + return repeat(EmptyType{}, G.ChildrenOfSCC.size()); + } + + [[nodiscard]] static llvm::ArrayRef + roots(const graph_type &G) noexcept { + return G.SCCRoots; + } + + [[nodiscard]] static auto vertices(const graph_type &G) noexcept { + return iota(G.Adj.size()); + } + + [[nodiscard]] static value_type node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { + assert(G.ChildrenOfSCC.inbounds(Vtx)); + return {}; + } + + [[nodiscard]] static size_t size(const graph_type &G) noexcept { + return G.ChildrenOfSCC.size(); + } + + [[nodiscard]] static size_t + roots_size(const graph_type &G) noexcept { // NOLINT + return G.SCCRoots.size(); + } - void print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, const G &Graph); + [[nodiscard]] constexpr vertex_t target(edge_t Edge) noexcept { return Edge; } + + [[nodiscard]] vertex_t withEdgeTarget(edge_t /*edge*/, + vertex_t Tar) noexcept { + return Tar; + } }; -// holds topologically sorted scccallers -struct SCCOrder { - llvm::SmallVector SCCIds; +// holds topologically sorted SCCDependencyGraph +template struct SCCOrder { + llvm::SmallVector, 0> SCCIds; }; +namespace detail { + template struct SCCData { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; + TypedVector Disc; + TypedVector Low; + BitSet OnStack; llvm::SmallVector Stack; uint32_t Time = 0; - llvm::SmallBitVector Seen; + BitSet Seen; explicit SCCData(size_t NumFuns) : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), @@ -72,8 +179,8 @@ template struct SCCData { }; template struct SCCDataIt { - TypedVector Disc; - TypedVector Low; + TypedVector Disc; + TypedVector Low; BitSet OnStack; llvm::SmallVector Stack; llvm::SmallVector> CallStack; @@ -91,238 +198,249 @@ constexpr void setMin(uint32_t &InOut, uint32_t Other) { } } -// TODO: Non-recursive version template -static void computeSCCsRec(const G &Graph, typename G::GraphNodeId CurrNode, - SCCData &Data, - SCCHolder &Holder) { +static void +computeSCCsRec(const G &Graph, typename GraphTraits::vertex_t CurrNode, + SCCData::vertex_t> &Data, + SCCHolder::vertex_t> &Holder) { // See // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components auto CurrTime = Data.Time++; - Data.Disc[size_t(CurrNode)] = CurrTime; - Data.Low[size_t(CurrNode)] = CurrTime; + Data.Disc[CurrNode] = CurrTime; + Data.Low[CurrNode] = CurrTime; Data.Stack.push_back(CurrNode); - Data.OnStack.set(uint32_t(CurrNode)); + Data.OnStack.insert(CurrNode); + + using GTraits = psr::GraphTraits; + using detail::setMin; + using SCCId = psr::SCCId::vertex_t>; - for (auto SuccNode : Graph.Adj[size_t(CurrNode)]) { - if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { + for (const auto &OutEdge : GTraits::outEdges(Graph, CurrNode)) { + auto SuccNode = GTraits::target(OutEdge); + if (Data.Disc[SuccNode] == UINT32_MAX) { // Tree-edge: Not seen yet --> recurse computeSCCsRec(Graph, SuccNode, Data, Holder); - setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); - } else if (Data.OnStack.test(uint32_t(SuccNode))) { + setMin(Data.Low[CurrNode], Data.Low[SuccNode]); + } else if (Data.OnStack.contains(SuccNode)) { // Back-edge --> circle! - setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); + setMin(Data.Low[CurrNode], Data.Disc[SuccNode]); } } - if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { + if (Data.Low[CurrNode] == Data.Disc[CurrNode]) { // Found SCC - auto SCCIdx = SCCId(Holder.NumSCCs++); + auto SCCIdx = SCCId(Holder.NodesInSCC.size()); auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); assert(!Data.Stack.empty()); while (Data.Stack.back() != CurrNode) { auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); + Holder.SCCOfNode[Fun] = SCCIdx; + Data.OnStack.erase(Fun); + Data.Seen.insert(Fun); NodesInSCC.push_back(Fun); } auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); + Holder.SCCOfNode[Fun] = SCCIdx; + Data.OnStack.erase(Fun); + Data.Seen.insert(Fun); NodesInSCC.push_back(Fun); } } -// Iterative Implementation for Tarjan's SCC Alg. -template -static void tarjanSCCIt(const G &Graph, - SCCDataIt &Data, - SCCHolder &Holder) { - using GraphNodeId = typename G::GraphNodeId; - uint32_t Unvisited = UINT32_MAX; - auto CurrTime = 0; - for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { - // std::cout << std::to_string(Vertex) << "\n"; - if (Data.Disc[GraphNodeId(Vertex)] == Unvisited) { - Data.CallStack.push_back({GraphNodeId(Vertex), 0}); - while (!Data.CallStack.empty()) { - auto Curr = Data.CallStack.pop_back_val(); - // Curr.second = 0 implies that node Curr.fist was not visited before - if (Curr.second == 0) { - Data.Disc[Curr.first] = CurrTime; - Data.Low[Curr.first] = CurrTime; - CurrTime++; - Data.Stack.push_back(Curr.first); - Data.OnStack.insert(Curr.first); - } - // Curr.second > 0 implies that we came back from a recursive call of - // node with higher depth - if (Curr.second > 0) { - GraphNodeId Pred = Graph.getEdges(Curr.first)[Curr.second - 1]; - setMin(Data.Low[Curr.first], Data.Low[Pred]); - } - // find the next node for recursion - while (Curr.second < Graph.getEdges(Curr.first).size() && - Data.Disc[Graph.getEdges(Curr.first)[Curr.second]] != - Unvisited) { - GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; - if (Data.OnStack.test(uint32_t(W))) { - setMin(Data.Low[Curr.first], Data.Disc[W]); - } - Curr.second++; - } - // If a Node u is undiscovered i.e. Data.Disc[u] = UINT32_MAX - // start a recursive function call - if (Curr.second < Graph.getEdges(Curr.first).size()) { - GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; - Data.CallStack.push_back({Curr.first, Curr.second + 1}); - Data.CallStack.push_back({U, 0}); - continue; - } - // If Curr.first is the root of a connected component i.e. Data.Disc = - // Data.Low i.e. cycle found - if (Data.Low[Curr.first] == Data.Disc[Curr.first]) { - //-> SCC found - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != Curr.first) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.erase(Fun); - Data.Seen.insert(Fun); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.erase(Fun); - Data.Seen.insert(Fun); - NodesInSCC.push_back(Fun); - } - } - } - } -} +} // namespace detail template -[[nodiscard]] SCCHolder computeSCCs(const G &Graph) { - SCCHolder Ret{}; +[[nodiscard]] SCCHolder::vertex_t> +computeSCCs(const G &Graph) { + using GTraits = psr::GraphTraits; + + SCCHolder Ret{}; - auto NumNodes = Graph.Adj.size(); + auto NumNodes = GTraits::size(Graph); Ret.SCCOfNode.resize(NumNodes); if (!NumNodes) { return Ret; } - SCCData Data(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) { - computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); + detail::SCCData Data(NumNodes); + for (auto VtxId : GTraits::vertices(Graph)) { + if (!Data.Seen.contains(VtxId)) { + computeSCCsRec(Graph, VtxId, Data, Ret); } } return Ret; } -// choose which Tarjan implementation will be executed +// Note: generated by FhGenie GPT o3 Mini template -[[nodiscard]] SCCHolder -execTarjan(const G &Graph, const bool Iterative) { - using GraphNodeId = typename G::GraphNodeId; - SCCHolder Ret{}; - - auto NumNodes = Graph.Adj.size(); - Ret.SCCOfNode.resize(NumNodes); +SCCHolder>::vertex_t> +computeSCCIterative(const G &Graph) { + using GTraits = GraphTraits>; + using VertexTy = typename GTraits::vertex_t; + using EdgeTy = typename GTraits::edge_t; + using SCCId = psr::SCCId; + const int UNVISITED = -1; + + // Number of nodes (vertices are assumed to be consecutive indices). + size_t NumNodes = GTraits::size(Graph); + + // Use TypedVector for per-vertex data instead of unordered_map. + TypedVector Dfn; // discovery index. + Dfn.resize(NumNodes, UNVISITED); + TypedVector Lowlink; // smallest index reachable. + Lowlink.resize(NumNodes, 0); + TypedVector InStack; // marker for Tarjan's stack. + InStack.resize(NumNodes, false); + + int CurrentIndex = 0; + + // Our final SCC holder. Pre-resize SCCOfNode to the number of nodes. + SCCHolder Holder; + Holder.SCCOfNode.resize(NumNodes); + // Initially, holder.NodesInSCC is empty and holder.NumSCCs is zero. + + // Instead of storing a vector of out-edges, we store an iterator pair. + using OutEdgeRange = + decltype(GTraits::outEdges(Graph, std::declval())); + using OutEdgeIterator = decltype(std::begin(std::declval())); + + // DFS frame holding current vertex and its edge iterator range. + struct DFSFrame { + VertexTy V; + OutEdgeIterator It; + OutEdgeIterator ItEnd; + }; + std::vector DfsStack; + std::vector S; // Tarjan's stack (vertices in the current DFS path). + + // Helper to push a new DFS frame. + const auto PushFrame = [&](const VertexTy &V) { + auto &&Range = GTraits::outEdges(Graph, V); + DFSFrame Frame{ + V, + std::begin(Range), + std::end(Range), + }; + DfsStack.push_back(Frame); + }; - if (!NumNodes) { - return Ret; - } + // Iterate over all vertices (assumed to be dense). + for (const auto &V : GTraits::vertices(Graph)) { + if (Dfn[V] != UNVISITED) { + continue; // already visited + } - SCCData Data(NumNodes); - SCCDataIt DataIt(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (Iterative) { - if (!DataIt.Seen.test(FunId)) { - tarjanSCCIt(Graph, DataIt, Ret); - } - } else { - if (!Data.Seen.test(FunId)) { - computeSCCsRec(Graph, GraphNodeId(FunId), Data, Ret); + PushFrame(V); + Dfn[V] = CurrentIndex; + Lowlink[V] = CurrentIndex; + CurrentIndex++; + S.push_back(V); + InStack[V] = true; + + // DFS simulation using the explicit stack. + while (!DfsStack.empty()) { + DFSFrame &Frame = DfsStack.back(); + VertexTy U = Frame.V; + if (Frame.It != Frame.ItEnd) { + // Process the next outgoing edge. + const EdgeTy &Edge = *(Frame.It++); + VertexTy W = GTraits::target(Edge); + if (Dfn[W] == UNVISITED) { + // w is newly discovered. + PushFrame(W); + Dfn[W] = CurrentIndex; + Lowlink[W] = CurrentIndex; + CurrentIndex++; + S.push_back(W); + InStack[W] = true; + } else if (InStack[W]) { + // w is in the current DFS path; update lowlink. + Lowlink[U] = std::min(Lowlink[U], Dfn[W]); + } + } else { + // Done exploring u. + if (Lowlink[U] == Dfn[U]) { + // u is the root of an SCC; pop from S until u is reached. + auto &Comp = Holder.NodesInSCC.emplace_back(); // The new SCC. + VertexTy W; + do { + W = S.back(); + S.pop_back(); + InStack[W] = false; + // Assign w the current SCC id. + Holder.SCCOfNode[W] = static_cast(Holder.size()); + Comp.push_back(W); + } while (W != U); + } + DfsStack.pop_back(); + if (!DfsStack.empty()) { + // After returning, update the parent's lowlink. + VertexTy Parent = DfsStack.back().V; + Lowlink[Parent] = std::min(Lowlink[Parent], Lowlink[U]); + } } } } - return Ret; + return Holder; } template -[[nodiscard]] SCCCallers -computeSCCCallers(const G &Graph, - const SCCHolder &SCCs); +SCCDependencyGraph::vertex_t> computeSCCDependencies( + const G &Graph, const SCCHolder::vertex_t> &SCCs) { -template -auto computeSCCCallers(const G &Graph, - const SCCHolder &SCCs) - -> SCCCallers { - SCCCallers Ret; - Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); + using GTraits = GraphTraits; + using GraphNodeId = typename GraphTraits::vertex_t; - llvm::SmallBitVector Roots(SCCs.NumSCCs, true); + SCCDependencyGraph Ret; + Ret.ChildrenOfSCC.resize(SCCs.size()); - size_t NodeId = 0; - for (const auto &SuccNodes : Graph.Adj) { + BitSet> Roots(SCCs.size(), true); + + for (auto NodeId : GTraits::vertices(Graph)) { auto SrcSCC = SCCs.SCCOfNode[NodeId]; - for (auto SuccNode : SuccNodes) { - auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; - if (DestSCC != SrcSCC) { - Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); - Roots.reset(uint32_t(DestSCC)); + for (const auto &Edge : GTraits::outEdges(Graph, NodeId)) { + auto Succ = GTraits::target(Edge); + auto SuccSCC = SCCs.SCCOfNode[Succ]; + if (SuccSCC != SrcSCC) { + Ret.ChildrenOfSCC[SrcSCC].insert(SuccSCC); + Roots.erase(SuccSCC); } } - - ++NodeId; } Ret.SCCRoots.reserve(Roots.count()); - for (auto Rt : Roots.set_bits()) { - Ret.SCCRoots.push_back(SCCId(Rt)); + for (auto Rt : Roots) { + Ret.SCCRoots.push_back(Rt); } return Ret; } -template -[[nodiscard]] SCCOrder -computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers); -template -inline auto computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers) - -> SCCOrder { - SCCOrder Ret; - Ret.SCCIds.reserve(SCCs.NumSCCs); - - llvm::SmallBitVector Seen; - Seen.resize(SCCs.NumSCCs); - - auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { - Seen.set(uint32_t(CurrSCC)); - for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { - if (!Seen.test(uint32_t(Caller))) { +template +[[nodiscard]] SCCOrder +computeSCCOrder(const SCCHolder &SCCs, + const SCCDependencyGraph &Callers) { + SCCOrder Ret; + Ret.SCCIds.reserve(SCCs.size()); + + BitSet> Seen; + Seen.reserve(SCCs.size()); + + auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { + Seen.insert(CurrSCC); + for (auto Caller : Callers.ChildrenOfSCC[CurrSCC]) { + if (!Seen.contains(Caller)) { Dfs(Dfs, Caller); } } @@ -330,7 +448,7 @@ inline auto computeSCCOrder(const SCCHolder &SCCs, }; for (auto Leaf : Callers.SCCRoots) { - if (!Seen.test(uint32_t(Leaf))) { + if (!Seen.contains(Leaf)) { Dfs(Dfs, Leaf); } } @@ -339,19 +457,6 @@ inline auto computeSCCOrder(const SCCHolder &SCCs, return Ret; } -} // namespace psr::analysis::call_graph - -namespace llvm { -template <> struct DenseMapInfo { - using SCCId = psr::analysis::call_graph::SCCId; - - static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } - static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } - static inline auto getHashValue(SCCId Id) noexcept { - return llvm::hash_value(uint32_t(Id)); - } - static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } -}; -} // namespace llvm +} // namespace psr #endif diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 1e2f2524cc..d823fc52a2 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -203,6 +203,19 @@ struct has_llvm_dense_map_info< decltype(llvm::DenseMapInfo::isEqual(std::declval(), std::declval()))>> : std::true_type {}; + +template +struct is_incrementable : std::false_type {}; +template +struct is_incrementable())>> + : std::true_type {}; + +template +struct is_explicitly_convertible_to : std::false_type {}; +template +struct is_explicitly_convertible_to< + From, To, std::void_t(std::declval()))>> + : std::true_type {}; } // namespace detail template @@ -281,6 +294,13 @@ constexpr bool has_llvm_dense_map_info = detail::has_llvm_dense_map_info::value; template using type_identity_t = typename type_identity::type; +template +PSR_CONCEPT is_incrementable = detail::is_incrementable::value; + +template +PSR_CONCEPT is_explicitly_convertible_to = + detail::is_explicitly_convertible_to::value; + template constexpr size_t variant_idx = detail::variant_idx::value; diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index 7ab8dacb00..46a56c9b29 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -13,6 +13,7 @@ #include "phasar/Utils/ByRef.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include @@ -28,7 +29,7 @@ class TypedVector { TypedVector() noexcept = default; TypedVector(std::initializer_list IList) : Vec(IList) {} TypedVector(size_t Size) : Vec(Size) {} - TypedVector(size_t Size, ValueT Default) : Vec(Size, Default){}; + TypedVector(size_t Size, ValueT Default) : Vec(Size, Default) {}; template explicit TypedVector(Iter From, Iter To) @@ -90,16 +91,29 @@ class TypedVector { return !(*this == Other); } - [[nodiscard]] llvm::ArrayRef asRef() const &noexcept { return Vec; } - [[nodiscard]] llvm::ArrayRef asRef() &&noexcept = delete; + [[nodiscard]] llvm::ArrayRef asRef() const & noexcept { return Vec; } + [[nodiscard]] llvm::ArrayRef asRef() && noexcept = delete; [[nodiscard]] llvm::ArrayRef // NOLINTNEXTLINE(readability-identifier-naming) - drop_front(size_t Offs) const &noexcept { + drop_front(size_t Offs) const & noexcept { return asRef().drop_front(Offs); } [[nodiscard]] llvm::ArrayRef - drop_front(size_t Offs) &&noexcept = delete; + drop_front(size_t Offs) && noexcept = delete; + + [[nodiscard]] auto enumerate() const noexcept { + return llvm::map_range(llvm::enumerate(Vec), [](const auto &IndexAndVal) { + return std::pair>{IdT(IndexAndVal.index()), + IndexAndVal.value()}; + }); + } + [[nodiscard]] auto enumerate() noexcept { + return llvm::map_range(llvm::enumerate(Vec), [](auto &IndexAndVal) { + return std::pair{IdT(IndexAndVal.index()), + IndexAndVal.value()}; + }); + } private: llvm::SmallVector Vec{}; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp similarity index 85% rename from lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp rename to lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index a25b557ae5..faeffa233a 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -7,11 +7,10 @@ * Fabian Schiebel and other *****************************************************************************/ -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" -#include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" @@ -24,7 +23,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -45,11 +43,7 @@ #include using namespace psr; -using namespace psr::analysis::call_graph; - -using TAGAliasHandler = llvm::function_ref; -using TAGAliasInfo = llvm::function_ref; +using namespace psr::vta; static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { OS << "var-"; @@ -67,14 +61,12 @@ static void printNodeImpl(llvm::raw_ostream &OS, Return Ret) { OS.write_escaped(Ret.Fun->getName()); } -void analysis::call_graph::printNode(llvm::raw_ostream &OS, TAGNode TN) { +void vta::printNode(llvm::raw_ostream &OS, TAGNode TN) { std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); } static llvm::SmallBitVector getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { - /// NOTE: Copied from SiLLiS - llvm::SmallBitVector Ret; auto PointerSize = DL.getPointerSize(); @@ -278,7 +270,7 @@ static void handleGEP(const llvm::GetElementPtrInst *GEP, } static bool handleEntryForStore(const llvm::StoreInst *Store, - TypeAssignmentGraph &TAG, TAGAliasInfo AI, + TypeAssignmentGraph &TAG, AliasInfoTy AI, const llvm::DataLayout &DL) { const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); bool IsEntry = isVTableOrFun(Base); @@ -317,7 +309,7 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, } static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, - TAGAliasInfo AI, const llvm::DataLayout &DL) { + AliasInfoTy AI, const llvm::DataLayout &DL) { if (handleEntryForStore(Store, TAG, AI, DL)) { return; @@ -390,30 +382,6 @@ static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { } } -static llvm::StringRef extractTypeName(llvm::StringRef CtorName) { - // Example: _ZN3OneC2Ev - - auto EndIdx = CtorName.rfind("C2E"); - if (EndIdx == llvm::StringRef::npos) { - EndIdx = CtorName.rfind("C1E"); - } - - if (EndIdx == llvm::StringRef::npos) { - EndIdx = CtorName.size(); - } - - auto StartIdx = EndIdx; - while (StartIdx) { - --StartIdx; - - if (llvm::isDigit(CtorName[StartIdx])) { - break; - } - } - return CtorName.slice(StartIdx, EndIdx); -} -static llvm::StringRef extractTypeName(std::string &&) = delete; - static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, const llvm::Module &Mod, const psr::LLVMVFTableProvider &VTP) { @@ -437,6 +405,8 @@ static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, return nullptr; } + // TODO: With latest changes from f-TestingAPIChanges, we don't need the below + // loop! auto ClearName = CompTy->getName().str(); const auto *Scope = CompTy->getScope(); while (llvm::isa_and_nonnull Seen; - // llvm::SmallVector WL = {Call}; - - // // Search for the ctor call - - // const auto *CallerFun = Call->getFunction(); - - // while (!WL.empty()) { - // const auto *CurrObj = WL.pop_back_val(); - // for (const auto &Use : CurrObj->uses()) { - // const auto *User = llvm::dyn_cast(Use.getUser()); - // if (!User || User->getFunction() != CallerFun) - // continue; - - // if (const auto *Cast = llvm::dyn_cast(User); - // Cast && Cast->getDestTy()->isPointerTy()) { - // if (Seen.insert(Cast).second) - // WL.push_back(Cast); - - // continue; - // } - - // if (const auto *CtorCall = llvm::dyn_cast(User); - // CtorCall && CtorCall->getCalledFunction() && - // Use == CtorCall->getArgOperand(0)) { - // auto CtorName = CtorCall->getCalledFunction()->getName(); - // if (psr::isConstructor(CtorName)) { - // auto DemangledCtorName = llvm::demangle(CtorName.str()); - - // auto TypeName = extractTypeName(CtorName); - - // // TODO - // } - // // TODO: Extract type from ctor fun - // } - // } - // } } static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, @@ -599,7 +530,7 @@ static void handleReturn(const llvm::ReturnInst *Ret, static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, const psr::CallGraph &BaseCG, - TAGAliasInfo AI, const llvm::DataLayout &DL, + AliasInfoTy AI, const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { if (const auto *Alloca = llvm::dyn_cast(&I)) { handleAlloca(Alloca, TAG, VTP); @@ -644,7 +575,7 @@ static void buildTAGWithFun( const llvm::Function *Fun, TypeAssignmentGraph &TAG, const psr::CallGraph &BaseCG, - TAGAliasInfo AI, const llvm::DataLayout &DL, + AliasInfoTy AI, const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { for (const auto &I : llvm::instructions(Fun)) { dispatch(I, TAG, BaseCG, AI, DL, VTP); @@ -655,7 +586,7 @@ static auto computeTypeAssignmentGraphImpl( const llvm::Module &Mod, const psr::CallGraph &BaseCG, - TAGAliasInfo AI, const psr::LLVMVFTableProvider &VTP) + AliasInfoTy AI, const psr::LLVMVFTableProvider &VTP) -> TypeAssignmentGraph { TypeAssignmentGraph TAG; @@ -677,19 +608,14 @@ static auto computeTypeAssignmentGraphImpl( return TAG; } -auto analysis::call_graph::computeTypeAssignmentGraph( +auto vta::computeTypeAssignmentGraph( const llvm::Module &Mod, const psr::CallGraph &BaseCG, - psr::LLVMAliasInfoRef AS, const psr::LLVMVFTableProvider &VTP) + AliasInfoTy AS, const psr::LLVMVFTableProvider &VTP) -> TypeAssignmentGraph { - FilteredAliasSet FAS(AS); - return computeTypeAssignmentGraphImpl( - Mod, BaseCG, - [&FAS](const auto *Fact, const auto *At, TAGAliasHandler Handler) { - FAS.foreachAlias(Fact, At, Handler); - }, - VTP); + + return computeTypeAssignmentGraphImpl(Mod, BaseCG, AS, VTP); } void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp new file mode 100644 index 0000000000..3e6b1894e3 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp @@ -0,0 +1,87 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and other + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" + +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/SCCGeneric.h" + +using namespace psr; +using namespace psr::vta; + +static void initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + for (const auto &[Node, Types] : TAG.TypeEntryPoints) { + auto SCC = SCCs.SCCOfNode[Node]; + TA.TypesPerSCC[SCC].insert(Types.begin(), Types.end()); + } +} + +static void propagate(TypeAssignment &TA, + const SCCDependencyGraph &Deps, + SCCId CurrSCC) { + const auto &Types = TA.TypesPerSCC[CurrSCC]; + if (Types.empty()) { + return; + } + + for (auto Succ : Deps.ChildrenOfSCC[CurrSCC]) { + TA.TypesPerSCC[Succ].insert(Types.begin(), Types.end()); + } +} + +TypeAssignment vta::propagateTypes(const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs, + const SCCDependencyGraph &Deps, + const SCCOrder &Order) { + TypeAssignment Ret; + Ret.TypesPerSCC.resize(SCCs.size()); + + initialize(Ret, TAG, SCCs); + for (auto SCC : Order.SCCIds) { + propagate(Ret, Deps, SCC); + } + + return Ret; +} + +void TypeAssignment::print(llvm::raw_ostream &OS, + const TypeAssignmentGraph &TAG, + const SCCHolder &SCCs) { + OS << "digraph TypeAssignment {\n"; + psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; + + Compressor Types; + auto GetOrAddType = [&](const llvm::Value *Ty) { + auto [Id, Inserted] = Types.insert(Ty); + if (Inserted) { + OS << (size_t(Id) + SCCs.size()) << "[label=\""; + OS.write_escaped(Ty->getName()); + OS << "\"];\n"; + } + return Id + SCCs.size(); + }; + + for (const auto &[Ctr, NodesInSCC] : SCCs.NodesInSCC.enumerate()) { + OS << " " << uint32_t(Ctr) << "[label=\""; + for (auto TNId : SCCs.NodesInSCC[Ctr]) { + auto TN = TAG.Nodes[TNId]; + printNode(OS, TN); + OS << "\\n"; + } + OS << "\"];\n"; + + for (const auto *Ty : TypesPerSCC[Ctr]) { + auto TyId = GetOrAddType(Ty); + OS << uint32_t(Ctr) << "->" << TyId << ";\n"; + } + } +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp deleted file mode 100644 index b9f8afdf81..0000000000 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/TypePropagator.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -#include "phasar/PhasarLLVM/ControlFlow/TypePropagator.h" - -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h" -#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/Compressor.h" - -using namespace psr; -using namespace psr::analysis::call_graph; - -static void -initialize(TypeAssignment &TA, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { - for (const auto &[Node, Types] : TAG.TypeEntryPoints) { - auto SCC = SCCs.SCCOfNode[size_t(Node)]; - TA.TypesPerSCC[size_t(SCC)].insert(Types.begin(), Types.end()); - } -} - -static void propagate(TypeAssignment &TA, - const SCCCallers &Deps, - SCCId CurrSCC) { - const auto &Types = TA.TypesPerSCC[size_t(CurrSCC)]; - if (Types.empty()) { - return; - } - - for (auto Succ : Deps.ChildrenOfSCC[size_t(CurrSCC)]) { - TA.TypesPerSCC[size_t(Succ)].insert(Types.begin(), Types.end()); - } -} - -TypeAssignment analysis::call_graph::propagateTypes( - const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs, - const SCCCallers &Deps, const SCCOrder &Order) { - TypeAssignment Ret; - Ret.TypesPerSCC.resize(SCCs.NumSCCs); - - initialize(Ret, TAG, SCCs); - for (auto SCC : Order.SCCIds) { - propagate(Ret, Deps, SCC); - } - - return Ret; -} - -void TypeAssignment::print( - llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, - const SCCHolder &SCCs) { - OS << "digraph TypeAssignment {\n"; - psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - - Compressor Types; - auto GetOrAddType = [&](const llvm::Value *Ty) { - auto [Id, Inserted] = Types.insert(Ty); - if (Inserted) { - OS << (size_t(Id) + SCCs.NumSCCs) << "[label=\""; - OS.write_escaped(Ty->getName()); - OS << "\"];\n"; - } - return Id + SCCs.NumSCCs; - }; - - for (size_t Ctr = 0; Ctr != SCCs.NumSCCs; ++Ctr) { - OS << " " << Ctr << "[label=\""; - for (auto TNId : SCCs.NodesInSCC[Ctr]) { - auto TN = TAG.Nodes[TNId]; - printNode(OS, TN); - OS << "\\n"; - } - OS << "\"];\n"; - - for (const auto *Ty : TypesPerSCC[Ctr]) { - auto TyId = GetOrAddType(Ty); - OS << Ctr << "->" << TyId << ";\n"; - } - } -} diff --git a/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp b/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp deleted file mode 100644 index 84becb7a85..0000000000 --- a/lib/PhasarLLVM/Utils/FilteredAliasSet.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include "phasar/PhasarLLVM/Utils/FilteredAliasSet.h" - -#include "llvm/IR/Argument.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/Casting.h" - -namespace psr { - -template -void foreachFilteredAliasSetImpl( - const llvm::Value *Fact, const llvm::Instruction *At, - llvm::function_ref WithAlias, - FilteredAliasSet::alias_info_ref_t PT) { - - const auto *Base = Fact->stripPointerCastsAndAliases(); - - static constexpr auto GetFunction = - [](const llvm::Value *V) -> const llvm::Function * { - if (const auto *Inst = llvm::dyn_cast(V)) { - return Inst->getFunction(); - } - if (const auto *Arg = llvm::dyn_cast(V)) { - return Arg->getParent(); - } - return nullptr; - }; - - // If mustNoalias is false, then p1 and p2 may alias. If mustNoalias is true, - // then p1 and p2 definitely are not aliases. - static constexpr auto MustNoalias = [](const llvm::Value *P1, - const llvm::Value *P2) { - if (P1 == P2) { - return false; - } - assert(P1); - assert(P2); - if (const auto *Alloca1 = llvm::dyn_cast(P1)) { - if (llvm::isa(P2)) { - return true; - } - if (const auto *Alloca2 = llvm::dyn_cast(P2)) { - return !Alloca1->getAllocatedType()->isPointerTy() && - !Alloca2->getAllocatedType()->isPointerTy(); - } - } else if (const auto *Glob1 = llvm::dyn_cast(P1)) { - if (llvm::isa(P2) || Glob1->isConstant()) { - return true; - } - if (const auto *Glob2 = llvm::dyn_cast(P2)) { - return true; // approximation - } - } else if (const auto *Glob2 = llvm::dyn_cast(P2)) { - return Glob2->isConstant(); - } - - return false; - }; - - const auto *FactFun = At ? At->getFunction() : GetFunction(Fact); - - PT(Fact, [&](const llvm::Value *Alias) { - if (FactFun) { - // Skip inter-procedural aliases - const auto *AliasFun = GetFunction(Alias); - if (FactFun != AliasFun && AliasFun) { - return; - } - } - if (Fact == Alias) { - WithAlias(Alias); - return; - } - - const auto *AliasBase = Alias->stripPointerCastsAndAliases(); - if (MustNoalias(Base, AliasBase)) { - return; - } - - // bool IsMatching = false; - // auto Res = PT.alias(Fact, Alias, At); - - // if constexpr (MustAlias) { - // IsMatching = Res == psr::AliasResult::MustAlias; - // } else { - // IsMatching = Res != psr::AliasResult::NoAlias; - // } - // if (IsMatching) { - // WithAlias(Alias); - // } - WithAlias(Alias); - - if (const auto *Load = llvm::dyn_cast(Alias)) { - WithAlias(Load->getPointerOperand()); - } - }); -} - -auto FilteredAliasSet::getAliasSet(d_t Val, n_t At) -> container_type { - container_type Ret; - foreachFilteredAliasSetImpl( - Val, At, [&Ret](d_t Alias) { Ret.insert(Alias); }, PT); - return Ret; -} -// auto FilteredAliasSet::getMustAliasSet(d_t Val, n_t At) -> container_type { -// container_type Ret; -// foreachFilteredAliasSetImpl( -// Val, At, [&Ret](d_t Alias) { Ret.insert(Alias); }, PT); -// return Ret; -// } - -void FilteredAliasSet::foreachAlias(d_t Fact, n_t At, - llvm::function_ref WithAlias) { - foreachFilteredAliasSetImpl(Fact, At, WithAlias, PT); -} -// void FilteredAliasSet::foreachdMustAlias( -// d_t Fact, n_t At, llvm::function_ref WithAlias) { -// foreachFilteredAliasSetImpl(Fact, At, WithAlias, PT); -// } -} // namespace psr diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 6b51b99607..e43e209a1a 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -9,111 +9,100 @@ #include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/AdjacencyList.h" +#include "phasar/Utils/EmptyBaseOptimizationUtils.h" +#include "phasar/Utils/GraphTraits.h" + #include "gtest/gtest.h" -#include #include -#include //===----------------------------------------------------------------------===// // Unit tests for the Igeneric SCC algorithm using namespace psr; -using SCCId = analysis::call_graph::SCCId; -enum class [[clang::enum_extensibility(open)]] NodeId : uint32_t{}; - -class ExampleGraph { -public: - using GraphNodeId = NodeId; +enum class NodeId : uint32_t {}; +using SCCId = SCCId; - ExampleGraph() = default; - - [[nodiscard]] std::vector - getEdges(const GraphNodeId Vertex) const { - return Adj[uint32_t(Vertex)]; - } - std::vector> Adj; -}; +using ExampleGraph = AdjacencyList; static void computeSCCsAndCompare(ExampleGraph &Graph) { - auto OutputRec = analysis::call_graph::execTarjan(Graph, false); - auto OutputIt = analysis::call_graph::execTarjan(Graph, true); + auto OutputRec = computeSCCs(Graph); + auto OutputIt = computeSCCIterative(Graph); ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) << "Iterative Approach did not reach all nodes\n"; ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) << "Recursive Approach did not reach all nodes\n"; - EXPECT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) + EXPECT_EQ(OutputRec.size(), OutputIt.size()) << "Unequal number of SCC components\n"; /*std::cout << std::to_string(OutputRec.NumSCCs) << " " << std::to_string(OutputIt.NumSCCs) << "\n";*/ - for (size_t ID = 0; ID < Graph.Adj.size(); ID++) { - EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) - << "SCCs differ at Index: " << std::to_string(ID) << "\n"; + for (auto Vtx : GraphTraits::vertices(Graph)) { + EXPECT_EQ(OutputRec.SCCOfNode[Vtx], OutputIt.SCCOfNode[Vtx]) + << "SCCs differ at Index: " << uint32_t(Vtx) << "\n"; } } TEST(SCCGenericTest, SCCTest) { - using GraphNodeId = ExampleGraph::GraphNodeId; - ExampleGraph GraphOne{{{GraphNodeId(2)}, - {GraphNodeId(0)}, - {GraphNodeId(1)}, - {GraphNodeId(1), GraphNodeId(2)}, - {GraphNodeId(1)}, - {GraphNodeId(4), GraphNodeId(6)}, - {GraphNodeId(4), GraphNodeId(7)}, - {GraphNodeId(5)}}}; + ExampleGraph GraphOne{{{NodeId(2)}, + {NodeId(0)}, + {NodeId(1)}, + {NodeId(1), NodeId(2)}, + {NodeId(1)}, + {NodeId(4), NodeId(6)}, + {NodeId(4), NodeId(7)}, + {NodeId(5)}}}; ExampleGraph GraphTwo{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}; - ExampleGraph GraphThree{{{GraphNodeId(1)}, - {GraphNodeId(2)}, - {GraphNodeId(3)}, - {GraphNodeId(4)}, - {GraphNodeId(5)}, - {GraphNodeId(6)}, - {GraphNodeId(0)}}}; - - ExampleGraph GraphFour{ - {{GraphNodeId(1), GraphNodeId(2), GraphNodeId(3), GraphNodeId(4)}, - {GraphNodeId(0), GraphNodeId(2), GraphNodeId(3), GraphNodeId(4)}, - {GraphNodeId(0), GraphNodeId(1), GraphNodeId(3), GraphNodeId(4)}, - {GraphNodeId(0), GraphNodeId(1), GraphNodeId(2), GraphNodeId(4)}, - {GraphNodeId(0), GraphNodeId(1), GraphNodeId(2), GraphNodeId(3)}}}; - - ExampleGraph GraphFive{{{GraphNodeId(1)}, - {GraphNodeId(2)}, - {GraphNodeId(3), GraphNodeId(4)}, - {GraphNodeId(5)}, - {GraphNodeId(5)}, - {GraphNodeId(2), GraphNodeId(6)}, - {GraphNodeId(7)}, - {GraphNodeId(1), GraphNodeId(8)}, + ExampleGraph GraphThree{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(0)}}}; + + ExampleGraph GraphFour{{{NodeId(1), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(3)}}}; + + ExampleGraph GraphFive{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3), NodeId(4)}, + {NodeId(5)}, + {NodeId(5)}, + {NodeId(2), NodeId(6)}, + {NodeId(7)}, + {NodeId(1), NodeId(8)}, {}}}; - ExampleGraph GraphSix{{{GraphNodeId(1)}, - {GraphNodeId(2)}, - {GraphNodeId(3)}, - {GraphNodeId(4)}, - {GraphNodeId(5)}, - {GraphNodeId(6)}, - {GraphNodeId(7)}, - {GraphNodeId(0)}, - {GraphNodeId(9)}, - {GraphNodeId(10)}, - {GraphNodeId(11)}, - {GraphNodeId(12)}, - {GraphNodeId(13), GraphNodeId(4)}, - {GraphNodeId(8)}, - {GraphNodeId(9)}, - {GraphNodeId(3)}, - {GraphNodeId(5)}}}; + ExampleGraph GraphSix{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(0)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13), NodeId(4)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(3)}, + {NodeId(5)}}}; std::vector TestGraphs = {GraphOne, GraphTwo, GraphThree, GraphFour, GraphFive, GraphSix}; - for (size_t Index = 0; Index < TestGraphs.size(); Index++) { - computeSCCsAndCompare(TestGraphs[Index]); + for (auto &TestGraph : TestGraphs) { + computeSCCsAndCompare(TestGraph); } /*auto OutputRec = analysis::call_graph::execTarjan(Graph, false); diff --git a/unittests/Utils/VTACallGraphTest.cpp b/unittests/Utils/VTACallGraphTest.cpp index 8449231b01..40ef7aaf0a 100644 --- a/unittests/Utils/VTACallGraphTest.cpp +++ b/unittests/Utils/VTACallGraphTest.cpp @@ -10,7 +10,7 @@ // #include "phasar/AnalysisConfig.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" -#include "phasar/PhasarLLVM/ControlFlow/call_graph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" From 8fd252458fdbecf01c410dd37887ed7798d170ed Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 2 Sep 2025 19:32:24 +0200 Subject: [PATCH 13/27] Update TAG to llvm::DIType instead of llvm::Type + fix VTACallGraphTest with opaque pointers --- .../ControlFlow/Resolver/VTAResolver.h | 60 +++++ .../ControlFlow/VTA/TypeAssignmentGraph.h | 77 +++++- .../ControlFlow/VTA/TypePropagator.h | 5 +- .../PhasarLLVM/ControlFlow/VTA/call_graph.h | 31 --- .../TypeHierarchy/DIBasedTypeHierarchy.h | 6 +- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 2 + include/phasar/Utils/SCCGeneric.h | 2 +- .../ControlFlow/LLVMVFTableProvider.cpp | 15 +- .../ControlFlow/Resolver/Resolver.cpp | 14 +- .../ControlFlow/Resolver/VTAResolver.cpp | 137 +++++++++++ .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 230 ++++++++++-------- .../ControlFlow/VTA/TypePropagator.cpp | 15 +- lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 25 ++ .../PhasarLLVM/ControlFlow/CMakeLists.txt | 1 + .../ControlFlow}/VTACallGraphTest.cpp | 92 ++++--- unittests/Utils/CMakeLists.txt | 1 - 16 files changed, 512 insertions(+), 201 deletions(-) create mode 100644 include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h delete mode 100644 include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h create mode 100644 lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp rename unittests/{Utils => PhasarLLVM/ControlFlow}/VTACallGraphTest.cpp (69%) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h new file mode 100644 index 0000000000..c16174b0ee --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -0,0 +1,60 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/ADT/STLExtras.h" + +namespace psr { +class VTAResolver : public Resolver { +public: + explicit VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + const LLVMBasedCallGraph *BaseCG, vta::AliasInfoTy AS); + explicit VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + const LLVMBasedCallGraph *BaseCG, LLVMAliasInfoRef AS) + : VTAResolver(IRDB, VTP, BaseCG, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }) {} + + [[nodiscard]] std::string str() const override; + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + +private: + void resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + void resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + const LLVMBasedCallGraph *BaseCG{}; + vta::TypeAssignment TA{}; + SCCHolder SCCs{}; + Compressor Nodes; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index f4426a8883..505e753d75 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -15,12 +15,17 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/Utils/Compressor.h" +#include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -37,7 +42,7 @@ struct Variable { }; struct Field { - const llvm::Type *Base; + const llvm::DIType *Base; size_t ByteOffset; }; @@ -112,22 +117,23 @@ namespace psr::vta { struct TypeAssignmentGraph { using GraphNodeId = TAGNodeId; + using TypeInfoTy = + llvm::PointerUnion; Compressor Nodes; - llvm::SmallVector, 0> Adj; - llvm::SmallDenseMap> + TypedVector> Adj; + llvm::SmallDenseMap> TypeEntryPoints; - [[nodiscard]] inline std::optional - get(TAGNode TN) const noexcept { + [[nodiscard]] inline std::optional get(TAGNode TN) const noexcept { return Nodes.getOrNull(TN); } - [[nodiscard]] inline TAGNode operator[](GraphNodeId Id) const noexcept { + [[nodiscard]] inline TAGNode operator[](TAGNodeId Id) const noexcept { return Nodes[Id]; } - inline void addEdge(GraphNodeId From, GraphNodeId To) { + inline void addEdge(TAGNodeId From, TAGNodeId To) { assert(size_t(From) < Adj.size()); assert(size_t(To) < Adj.size()); @@ -135,7 +141,7 @@ struct TypeAssignmentGraph { return; } - Adj[size_t(From)].insert(To); + Adj[From].insert(To); } void print(llvm::raw_ostream &OS); @@ -155,4 +161,57 @@ using AliasInfoTy = llvm::function_ref struct GraphTraits { + using graph_type = vta::TypeAssignmentGraph; + using value_type = vta::TAGNode; + using vertex_t = vta::TAGNodeId; + using edge_t = vertex_t; + + static constexpr vertex_t Invalid = vertex_t(UINT32_MAX); + + [[nodiscard]] static const auto &outEdges(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return G.Adj[Vtx]; + } + + [[nodiscard]] static const auto &nodes(const graph_type &G) noexcept { + return G.Nodes; + } + + [[nodiscard]] static auto roots(const graph_type &G) noexcept { + return llvm::make_first_range(G.TypeEntryPoints); + } + + [[nodiscard]] static auto vertices(const graph_type &G) noexcept { + return iota(G.Adj.size()); + } + + [[nodiscard]] static value_type node(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + assert(G.Adj.size() == G.Nodes.size()); + return G.Nodes[Vtx]; + } + + [[nodiscard]] static size_t size(const graph_type &G) noexcept { + assert(G.Adj.size() == G.Nodes.size()); + return G.Adj.size(); + } + + [[nodiscard]] static size_t + roots_size(const graph_type &G) noexcept { // NOLINT + return G.TypeEntryPoints.size(); + } + + [[nodiscard]] static vertex_t target(edge_t Edge) noexcept { return Edge; } + + [[nodiscard]] static vertex_t withEdgeTarget(edge_t /*Edge*/, + vertex_t NewTgt) noexcept { + return NewTgt; + } +}; +} // namespace psr + #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h index 680e8f238d..34d7085f02 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h @@ -14,6 +14,7 @@ #include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -32,7 +33,9 @@ struct TypeAssignmentGraph; enum class TAGNodeId : uint32_t; struct TypeAssignment { - TypedVector, llvm::SmallDenseSet> + TypedVector, + llvm::SmallDenseSet>> TypesPerSCC; void print(llvm::raw_ostream &OS, const TypeAssignmentGraph &TAG, diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h deleted file mode 100644 index ffc265862f..0000000000 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h +++ /dev/null @@ -1,31 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and others - *****************************************************************************/ - -#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CALL_GRAPH_H -#define PHASAR_PHASARLLVM_CONTROLFLOW_CALL_GRAPH_H - -#include "phasar/ControlFlow/CallGraph.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" -#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" - -#include "llvm/IR/Module.h" - -namespace psr { - -[[nodiscard]] psr::CallGraph -computeVTACallgraph(const llvm::Module &Mod, - const psr::CallGraph &BaseCG, - psr::LLVMAliasInfoRef AS, - const psr::LLVMVFTableProvider &VTP); - -} // namespace psr - -#endif diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h index fccd023f85..e4001bbd25 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h @@ -92,7 +92,7 @@ class DIBasedTypeHierarchy [[nodiscard]] const auto &getAllVTables() const noexcept { return VTables; } - [[nodiscard]] llvm::StringRef getTypeName(ClassType Type) const override { + [[nodiscard]] static llvm::StringRef typeName(ClassType Type) { if (const auto *CompTy = llvm::dyn_cast(Type)) { auto Ident = CompTy->getIdentifier(); return Ident.empty() ? CompTy->getName() : Ident; @@ -100,6 +100,10 @@ class DIBasedTypeHierarchy return Type->getName(); } + [[nodiscard]] llvm::StringRef getTypeName(ClassType Type) const override { + return typeName(Type); + } + [[nodiscard]] size_t size() const noexcept override { return VertexTypes.size(); } diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index f4fad2d4cb..53641286ed 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -103,6 +103,8 @@ std::string llvmIRToShortString(const llvm::Value *V); */ [[nodiscard]] std::string llvmTypeToString(const llvm::Type *Ty, bool Shorten = false); +[[nodiscard]] std::string llvmTypeToString(const llvm::DIType *Ty, + bool Shorten = false); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Value *V); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Instruction *V); diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 26d773b1e6..d148711060 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -419,7 +419,7 @@ SCCDependencyGraph::vertex_t> computeSCCDependencies( } } - Ret.SCCRoots.reserve(Roots.count()); + Ret.SCCRoots.reserve(Roots.size()); for (auto Rt : Roots) { Ret.SCCRoots.push_back(Rt); } diff --git a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp index 67246c938e..82c2f9ec34 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMVFTableProvider.cpp @@ -4,6 +4,7 @@ #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMVFTable.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/MapUtils.h" #include "llvm/ADT/StringRef.h" @@ -25,20 +26,18 @@ static std::string getTypeName(const llvm::DIType *DITy) { auto TypeName = [DITy] { if (const auto *CompTy = llvm::dyn_cast(DITy)) { if (auto Ident = CompTy->getIdentifier(); !Ident.empty()) { - return Ident; + // In LLVM 17 demangle() takes a StringRef + return llvm::demangle(Ident.str()); } } - return DITy->getName(); + return llvmTypeToString(DITy, true); }(); - // In LLVM 17 demangle() takes a StringRef - auto Ret = llvm::demangle(TypeName.str()); - - if (llvm::StringRef(Ret).startswith(TSPrefixDemang)) { - Ret.erase(0, TSPrefixDemang.size()); + if (llvm::StringRef(TypeName).startswith(TSPrefixDemang)) { + TypeName.erase(0, TSPrefixDemang.size()); } - return Ret; + return TypeName; } static void insertVirtualFunctions( diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index f97fc8c1f1..5b916bcd1b 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -144,15 +144,23 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, // check potential receiver type const auto *RecType = getReceiverType(CallSite); if (!RecType) { - llvm::errs() << "No receiver type found for call at " - << llvmIRToString(Inst) << '\n'; + // llvm::errs() << "No receiver type found for call at " + // << llvmIRToString(Inst) << '\n'; return false; } if (!VTP.hasVFTable(RecType)) { + // llvm::errs() << "Receiver type has no vtable: " << + // llvmTypeToString(RecType) + // << " for call at " << llvmIRToString(Inst) << '\n'; return false; } - return getVFTIndex(CallSite) >= 0; + auto Idx = getVFTIndex(CallSite); + // llvm::errs() << "Retrieved Vtable index is: " << Idx << " for receiver-type + // " + // << llvmTypeToString(RecType) << " for call at " + // << llvmIRToString(Inst) << '\n'; + return Idx >= 0; } namespace psr { diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp new file mode 100644 index 0000000000..6f8611dd60 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -0,0 +1,137 @@ +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" + +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/InstrTypes.h" + +using namespace psr; + +VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + const LLVMBasedCallGraph *BaseCG, vta::AliasInfoTy AS) + : Resolver(IRDB, VTP), BaseCG(BaseCG) { + assert(BaseCG != nullptr); + + auto TAG = + vta::computeTypeAssignmentGraph(*IRDB->getModule(), *BaseCG, AS, *VTP); + + SCCs = computeSCCs(TAG); + auto Deps = computeSCCDependencies(TAG, SCCs); + auto Order = computeSCCOrder(SCCs, Deps); + TA = vta::propagateTypes(TAG, SCCs, Deps, Order); + + // TAG.print(llvm::errs()); + // TA.print(llvm::errs(), TAG, SCCs); + + Nodes = std::move(TAG.Nodes); +} + +std::string VTAResolver::str() const { return "VTA"; } + +void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) { + + // llvm::errs() << "[resolveVirtualCall] At " << llvmIRToString(CallSite) + // << '\n'; + + // TODO: Use getVFTIndexAndVT(), once #785 is merged + auto RetrievedVtableIndex = getVFTIndex(CallSite); + if (!RetrievedVtableIndex.has_value()) { + // An error occured + PHASAR_LOG_LEVEL(DEBUG, + "Error with resolveVirtualCall : impossible to retrieve " + "the vtable index\n" + << llvmIRToString(CallSite) << "\n"); + return; + } + + auto *VT = CallSite->getCalledOperand()->stripPointerCastsAndAliases(); + auto VtableIndex = RetrievedVtableIndex.value(); + + auto BaseCalleesVec = BaseCG->getCalleesOfCallAt(CallSite); + llvm::SmallDenseSet BaseCallees( + BaseCalleesVec.begin(), BaseCalleesVec.end()); + + auto ReceiverIdx = CallSite->hasStructRetAttr(); + if (CallSite->arg_size() > ReceiverIdx) { + const auto *Receiver = CallSite->getArgOperand(ReceiverIdx); + if (auto ReceiverNod = Nodes.getOrNull({vta::Variable{Receiver}})) { + auto SCC = SCCs.SCCOfNode[*ReceiverNod]; + const auto *ReceiverType = getReceiverType(CallSite); + + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *DITy = Ty.dyn_cast()) { + if (const auto *Fun = getNonPureVirtualVFTEntry( + DITy, VtableIndex, CallSite, ReceiverType)) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + // llvm::errs() << " Add possible target " << Fun->getName() + // << " through vtable lookup at index " << + // VtableIndex + // << " on type " << llvmTypeToString(DITy) << '\n'; + PossibleTargets.insert(Fun); + } + } + } + } + } + } + + auto TNId = Nodes.getOrNull({vta::Variable{VT}}); + if (TNId) { + auto SCC = SCCs.SCCOfNode[*TNId]; + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *Fun = Ty.dyn_cast()) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + // llvm::errs() << " Add possible target " << Fun->getName() + // << " through direct function pointer\n"; + PossibleTargets.insert(Fun); + } + } + } + } + + if (PossibleTargets.empty()) { + PossibleTargets = std::move(BaseCallees); + } +} + +void VTAResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) { + // llvm::errs() << "[resolveFunctionPointer] At " << llvmIRToString(CallSite) + // << '\n'; + + auto BaseCalleesVec = BaseCG->getCalleesOfCallAt(CallSite); + llvm::SmallDenseSet BaseCallees( + BaseCalleesVec.begin(), BaseCalleesVec.end()); + + auto TNId = Nodes.getOrNull({vta::Variable{ + CallSite->getCalledOperand()->stripPointerCastsAndAliases()}}); + if (TNId) { + auto SCC = SCCs.SCCOfNode[*TNId]; + const auto &Types = TA.TypesPerSCC[SCC]; + for (auto Ty : Types) { + if (const auto *Fun = Ty.dyn_cast()) { + if (psr::isConsistentCall(CallSite, Fun) && + (BaseCallees.empty() || BaseCallees.contains(Fun))) { + // llvm::errs() << " Add possible target " << Fun->getName() + // << " through direct function pointer\n"; + PossibleTargets.insert(Fun); + } + } + } + } + + if (PossibleTargets.empty()) { + PossibleTargets = std::move(BaseCallees); + } +} diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index faeffa233a..1d18b4175e 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -23,7 +23,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -32,6 +34,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" @@ -39,7 +42,7 @@ #include #include -#include +#include #include using namespace psr; @@ -65,31 +68,53 @@ void vta::printNode(llvm::raw_ostream &OS, TAGNode TN) { std::visit([&OS](auto Nod) { printNodeImpl(OS, Nod); }, TN.Label); } +static const llvm::DIType *stripMemberAndTypedef(const llvm::DIType *Ty) { + while (const auto *DerivedTy = llvm::dyn_cast(Ty)) { + if (DerivedTy->getTag() == llvm::dwarf::DW_TAG_typedef || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_member) { + Ty = DerivedTy->getBaseType(); + continue; + } + break; + } + return Ty; +} + +static bool isPointerTy(const llvm::DIType *Ty) { + if (const auto *DerivedTy = + llvm::dyn_cast(stripMemberAndTypedef(Ty))) { + return DerivedTy->getTag() == llvm::dwarf::DW_TAG_pointer_type || + DerivedTy->getTag() == llvm::dwarf::DW_TAG_reference_type; + } + return false; +} + +static const llvm::DICompositeType *isCompositeTy(const llvm::DIType *Ty) { + return llvm::dyn_cast(stripMemberAndTypedef(Ty)); +} + static llvm::SmallBitVector -getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { +getPointerIndicesOfType(llvm::DICompositeType *Ty, const llvm::DataLayout &DL) { llvm::SmallBitVector Ret; - auto PointerSize = DL.getPointerSize(); - // LOGS("[getPointerIndicesOfType]: " << *Ty ); - auto MaxNumPointers = - !Ty->isSized() ? 1 : DL.getTypeAllocSize(Ty) / PointerSize; + auto PointerSize = DL.getPointerSizeInBits(); + + // TODO: Does every type provide a meaningful getSizeInBits? + auto MaxNumPointers = Ty->getSizeInBits() / PointerSize; if (!MaxNumPointers) { return Ret; } Ret.resize(MaxNumPointers); - llvm::SmallVector> WorkList = {{Ty, 0}}; + llvm::SmallVector> WorkList = {{Ty, 0}}; while (!WorkList.empty()) { - auto [CurrTy, CurrByteOffs] = WorkList.pop_back_val(); + auto [CurrTy, CurrBitOffs] = WorkList.pop_back_val(); - if (CurrTy->isPointerTy()) { - size_t Idx = CurrByteOffs / PointerSize; - if (CurrByteOffs % PointerSize) [[unlikely]] { + if (isPointerTy(CurrTy)) { + size_t Idx = CurrBitOffs / PointerSize; + if (CurrBitOffs % PointerSize) [[unlikely]] { PHASAR_LOG_LEVEL(WARNING, "Unaligned pointer.."); - /*llvm::errs() << "[WARNING][getPointerIndicesOfType]: Unaligned pointer - " "found at offset " - << CurrByteOffs << " in type " << *Ty;*/ } assert(Ret.size() > Idx && "reserved unsufficient space for pointer indices"); @@ -97,24 +122,57 @@ getPointerIndicesOfType(llvm::Type *Ty, const llvm::DataLayout &DL) { continue; } - if (CurrTy->isArrayTy()) { - auto *ElemTy = CurrTy->getArrayElementType(); - auto ArrayLen = CurrTy->getArrayNumElements(); - auto ElemSize = DL.getTypeAllocSize(ElemTy); - for (size_t I = 0, Offs = CurrByteOffs; I < ArrayLen; - ++I, Offs += ElemSize) { - WorkList.emplace_back(ElemTy, Offs); + const auto *CompTy = isCompositeTy(CurrTy); + if (!CompTy) { + continue; + } + + auto Tag = CompTy->getTag(); + + if (Tag == llvm::dwarf::DW_TAG_array_type) { + auto *ElemTy = CompTy->getBaseType(); + const auto *ArrayLenRange = + llvm::cast(CompTy->getElements()[0]); + auto ArrayLenBound = ArrayLenRange->getCount(); + if (const auto *ArrayLenCInt = + ArrayLenBound.dyn_cast()) { + auto ArrayLen = ArrayLenCInt->getSExtValue(); + // Count is -1 for flexible array members; + if (ArrayLen < 0) { + continue; + } + + auto ElemSize = int64_t(ElemTy->getSizeInBits()); + for (int64_t I = 0, Offs = CurrBitOffs; I < ArrayLen; + ++I, Offs += ElemSize) { + WorkList.emplace_back(ElemTy, Offs); + } } + continue; } - if (auto *Struct = llvm::dyn_cast(CurrTy)) { - auto NumElems = Struct->getNumElements(); - const auto *SL = DL.getStructLayout(Struct); - for (size_t I = 0; I < NumElems; ++I) { - auto Offs = CurrByteOffs + SL->getElementOffset(I); - WorkList.emplace_back(Struct->getElementType(I), Offs); + if (Tag == llvm::dwarf::DW_TAG_structure_type || + Tag == llvm::dwarf::DW_TAG_class_type) { + + auto Elems = CompTy->getElements(); + uint64_t Offs = CurrBitOffs; + for (auto *Elem : Elems) { + auto *ElemTy = llvm::dyn_cast(Elem); + if (!ElemTy) { + continue; + } + + scope_exit IncOffs = [&] { Offs += ElemTy->getSizeInBits(); }; + + if (Elem->getTag() != llvm::dwarf::DW_TAG_inheritance && + Elem->getTag() != llvm::dwarf::DW_TAG_member) { + continue; + } + + WorkList.emplace_back(ElemTy, Offs); } + continue; } } @@ -133,12 +191,17 @@ static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, size_t PointerSize = DL.getPointerSize(); - for (auto *ST : Structs) { - auto Offsets = getPointerIndicesOfType(ST, DL); - for (auto Offs : Offsets.set_bits()) { - addTAGNode({Field{ST, Offs * PointerSize}}, TAG); + llvm::DebugInfoFinder DIF; + DIF.processModule(Mod); + + for (auto *DITy : DIF.types()) { + if (auto *CompTy = llvm::dyn_cast(DITy)) { + auto Offsets = getPointerIndicesOfType(CompTy, DL); + for (auto Offs : Offsets.set_bits()) { + addTAGNode({Field{CompTy, Offs * PointerSize}}, TAG); + } + addTAGNode({Field{CompTy, SIZE_MAX}}, TAG); } - addTAGNode({Field{ST, SIZE_MAX}}, TAG); } } @@ -200,22 +263,13 @@ static void initializeWithFun(const llvm::Function *Fun, } } -[[nodiscard]] static bool isVTableOrFun(const llvm::Value *Val) { - const auto *Base = Val->stripPointerCastsAndAliases(); - if (llvm::isa(Base)) { - return true; - } - - if (const auto *Glob = llvm::dyn_cast(Base)) { - return Glob->isConstant() && Glob->getName().startswith("_ZTV"); - } - - return false; -} - static void handleAlloca(const llvm::AllocaInst *Alloca, TypeAssignmentGraph &TAG, - const psr::LLVMVFTableProvider &VTP) { + const psr::LLVMVFTableProvider & /*VTP*/) { + if (Alloca->getAllocatedType()->isPointerTy()) { + return; + } + auto TN = TAG.get({Variable{Alloca}}); if (!TN) { return; @@ -226,9 +280,7 @@ static void handleAlloca(const llvm::AllocaInst *Alloca, return; } - if (const auto *TV = VTP.getVFTableGlobal(AllocTy)) { - TAG.TypeEntryPoints[*TN].insert(TV); - } + TAG.TypeEntryPoints[*TN].insert(AllocTy); } static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, @@ -242,7 +294,12 @@ static std::optional getGEPNode(const llvm::GetElementPtrInst *GEP, return SIZE_MAX; }(); - return TAG.get({Field{GEP->getSourceElementType(), Offs}}); + auto *VarTy = getVarTypeFromIR(GEP); + if (!VarTy) { + return std::nullopt; + } + + return TAG.get({Field{VarTy, Offs}}); } static void handleGEP(const llvm::GetElementPtrInst *GEP, @@ -272,10 +329,10 @@ static void handleGEP(const llvm::GetElementPtrInst *GEP, static bool handleEntryForStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, AliasInfoTy AI, const llvm::DataLayout &DL) { - const auto *Base = Store->getValueOperand()->stripPointerCastsAndAliases(); - bool IsEntry = isVTableOrFun(Base); + const auto *Base = llvm::dyn_cast( + Store->getValueOperand()->stripPointerCastsAndAliases()); - if (!IsEntry) { + if (!Base) { return false; } @@ -382,46 +439,10 @@ static void handlePhi(const llvm::PHINode *Phi, TypeAssignmentGraph &TAG) { } } -static const llvm::Value *getTypeFromDI(const llvm::DICompositeType *CompTy, - const llvm::Module &Mod, - const psr::LLVMVFTableProvider &VTP) { - if (!CompTy->getIdentifier().empty()) { - - std::string Buf; - auto TypeName = CompTy->getIdentifier(); - if (TypeName.startswith("_ZTS") || TypeName.startswith("_ZTI")) { - Buf = TypeName.str(); - Buf[3] = 'V'; - TypeName = Buf; - } - - if (const auto *GlobTV = Mod.getNamedGlobal(TypeName)) { - return GlobTV; - } - if (const auto *Alias = Mod.getNamedAlias(TypeName)) { - return Alias->getAliasee()->stripPointerCastsAndAliases(); - } - - return nullptr; - } - - // TODO: With latest changes from f-TestingAPIChanges, we don't need the below - // loop! - auto ClearName = CompTy->getName().str(); - const auto *Scope = CompTy->getScope(); - while (llvm::isa_and_nonnull(Scope)) { - ClearName = Scope->getName().str().append("::").append(ClearName); - Scope = Scope->getScope(); - } - - return VTP.getVFTableGlobal(ClearName); -} - static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, TypeAssignmentGraph &TAG, const llvm::Function *Callee, - const psr::LLVMVFTableProvider &VTP) { + const psr::LLVMVFTableProvider & /*VTP*/) { if (!psr::isHeapAllocatingFunction(Callee)) { return; @@ -434,11 +455,7 @@ static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { - if (const auto *Ty = getTypeFromDI(CompTy, *Call->getModule(), VTP)) { - - TAG.TypeEntryPoints[CSNod].insert(Ty); - return; - } + TAG.TypeEntryPoints[CSNod].insert(CompTy); } } } @@ -459,7 +476,8 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, HasArgNode = true; } - bool IsEntry = isVTableOrFun(Arg.get()); + bool IsEntry = + llvm::isa(Arg.get()->stripPointerCastsAndAliases()); EntryArgs.push_back(IsEntry); } @@ -481,8 +499,8 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, if (EntryArgs.test(Param.getArgNo())) { TAG.TypeEntryPoints[*ParamNodId].insert( - Call->getArgOperand(Param.getArgNo()) - ->stripPointerCastsAndAliases()); + llvm::cast(Call->getArgOperand(Param.getArgNo()) + ->stripPointerCastsAndAliases())); } if (!Arg) { @@ -515,8 +533,8 @@ static void handleReturn(const llvm::ReturnInst *Ret, if (const auto *RetVal = Ret->getReturnValue()) { const auto *Base = RetVal->stripPointerCastsAndAliases(); - if (isVTableOrFun(Base)) { - TAG.TypeEntryPoints[*TNId].insert(Base); + if (const auto *RetFun = llvm::dyn_cast(Base)) { + TAG.TypeEntryPoints[*TNId].insert(RetFun); return; } @@ -532,6 +550,10 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, const llvm::Function *> &BaseCG, AliasInfoTy AI, const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { + if (llvm::isa(&I)) { + return; + } + if (const auto *Alloca = llvm::dyn_cast(&I)) { handleAlloca(Alloca, TAG, VTP); return; @@ -595,14 +617,14 @@ static auto computeTypeAssignmentGraphImpl( addFields(Mod, TAG, DL); addGlobals(Mod, TAG); - for (const auto &Fun : Mod) { - initializeWithFun(&Fun, TAG); + for (const auto *Fun : BaseCG.getAllVertexFunctions()) { + initializeWithFun(Fun, TAG); } TAG.Adj.resize(TAG.Nodes.size()); - for (const auto &Fun : Mod) { - buildTAGWithFun(&Fun, TAG, BaseCG, AI, DL, VTP); + for (const auto *Fun : BaseCG.getAllVertexFunctions()) { + buildTAGWithFun(Fun, TAG, BaseCG, AI, DL, VTP); } return TAG; diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp index 3e6b1894e3..8be4122f43 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypePropagator.cpp @@ -14,6 +14,8 @@ #include "phasar/Utils/Compressor.h" #include "phasar/Utils/SCCGeneric.h" +#include "llvm/IR/DebugInfoMetadata.h" + using namespace psr; using namespace psr::vta; @@ -59,12 +61,17 @@ void TypeAssignment::print(llvm::raw_ostream &OS, OS << "digraph TypeAssignment {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - Compressor Types; - auto GetOrAddType = [&](const llvm::Value *Ty) { + Compressor Types; + auto GetOrAddType = [&](TypeAssignmentGraph::TypeInfoTy Ty) { auto [Id, Inserted] = Types.insert(Ty); if (Inserted) { OS << (size_t(Id) + SCCs.size()) << "[label=\""; - OS.write_escaped(Ty->getName()); + if (const auto *Fun = Ty.dyn_cast()) { + OS << "fun-" << Fun->getName(); + } else if (const auto *DITy = Ty.dyn_cast()) { + OS << "type-"; + OS.write_escaped(llvmTypeToString(DITy, true)); + } OS << "\"];\n"; } return Id + SCCs.size(); @@ -79,7 +86,7 @@ void TypeAssignment::print(llvm::raw_ostream &OS, } OS << "\"];\n"; - for (const auto *Ty : TypesPerSCC[Ctr]) { + for (auto Ty : TypesPerSCC[Ctr]) { auto TyId = GetOrAddType(Ty); OS << uint32_t(Ctr) << "->" << TyId << ";\n"; } diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 18cb20c97e..700a51b434 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -25,6 +25,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" @@ -234,6 +235,30 @@ std::string psr::llvmTypeToString(const llvm::Type *Ty, bool Shorten) { Ty->print(RSO, false, Shorten); return IRBuffer; } +std::string psr::llvmTypeToString(const llvm::DIType *Ty, bool Shorten) { + if (!Ty) { + return ""; + } + + std::string Ret; + + if (Shorten) { + Ret = Ty->getName().str(); + if (!Ret.empty()) { + const auto *Scope = Ty->getScope(); + while (llvm::isa_and_nonnull(Scope)) { + Ret = Scope->getName().str().append("::").append(Ret); + Scope = Scope->getScope(); + } + return Ret; + } + } + + llvm::raw_string_ostream RSO(Ret); + Ty->print(RSO); + return Ret; +} void psr::dumpIRValue(const llvm::Value *V) { llvm::outs() << llvmIRToString(V) << '\n'; diff --git a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt index 95d3b00e67..20c6350dbb 100644 --- a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt +++ b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt @@ -11,6 +11,7 @@ set(ControlFlowSources LLVMBasedICFGGlobCtorDtorTest.cpp LLVMBasedICFGSerializationTest.cpp LLVMVFTableProviderTest.cpp + VTACallGraphTest.cpp ) set(LLVM_LINK_COMPONENTS Linker) # The CtorDtorTest needs the linker diff --git a/unittests/Utils/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp similarity index 69% rename from unittests/Utils/VTACallGraphTest.cpp rename to unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp index 40ef7aaf0a..ffee4f2b8e 100644 --- a/unittests/Utils/VTACallGraphTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -8,10 +8,13 @@ *****************************************************************************/ // #include "phasar/AnalysisConfig.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" -#include "phasar/PhasarLLVM/ControlFlow/VTA/call_graph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" @@ -22,6 +25,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" +#include "TestConfig.h" #include "gtest/gtest.h" #include @@ -34,13 +38,8 @@ namespace { OS << " }"; return Ret; } -///////////////////////////// -psr::LLVMBasedICFG createBaseCG(psr::LLVMProjectIRDB &IRDB, - const psr::LLVMVFTableProvider &VTP, - const psr::DIBasedTypeHierarchy &TH, - psr::LLVMAliasInfoRef /*PT*/) { - psr::RTAResolver Res(&IRDB, &VTP, &TH); +std::vector getEntryPoints(const psr::LLVMProjectIRDB &IRDB) { std::vector EntryPoints; /////////////////////////////////// if (IRDB.getFunctionDefinition("main")) { @@ -52,14 +51,31 @@ psr::LLVMBasedICFG createBaseCG(psr::LLVMProjectIRDB &IRDB, } } } + return EntryPoints; +} +///////////////////////////// +psr::LLVMBasedCallGraph createBaseCG(psr::LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + const psr::DIBasedTypeHierarchy &TH, + psr::LLVMAliasInfoRef /*PT*/) { + psr::RTAResolver Res(&IRDB, &VTP, &TH); + /////////////////////////////////// - return psr::LLVMBasedICFG(&IRDB, Res, EntryPoints, psr::Soundness::Soundy); + return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB), + psr::Soundness::Soundy); +} + +psr::LLVMBasedCallGraph computeVTACallGraph( + psr::LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, + psr::LLVMAliasInfoRef AS, const psr::LLVMBasedCallGraph &BaseCG) { + psr::VTAResolver Res(&IRDB, &VTP, &BaseCG, AS); + return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB)); } + ////////////////////////////// class VTACallGraphTest : public ::testing::Test { protected: - static constexpr llvm::StringLiteral PathToLLFiles = - "/build/phasar/test/llvm_test_code/"; + static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER(""); struct GroundTruthEntry { size_t CSId; @@ -70,22 +86,22 @@ class VTACallGraphTest : public ::testing::Test { llvm::ArrayRef GT) { ASSERT_FALSE(GT.empty()) << "No Ground-Truth provided!"; - auto IRDB = std::make_unique(PathToLLFiles + IRFile); - ASSERT_TRUE(IRDB->isValid()); + auto IRDB = psr::LLVMProjectIRDB(PathToLLFiles + IRFile); + ASSERT_TRUE(IRDB.isValid()); - psr::LLVMVFTableProvider VTP(*IRDB); - psr::DIBasedTypeHierarchy TH(*IRDB); - psr::LLVMAliasSet AS(IRDB.get()); + psr::LLVMVFTableProvider VTP(IRDB); + psr::DIBasedTypeHierarchy TH(IRDB); + psr::LLVMAliasSet AS(&IRDB); // implement function locally - auto BaseCG = createBaseCG(*IRDB, VTP, TH, &AS); + auto BaseCG = createBaseCG(IRDB, VTP, TH, &AS); - auto CG = psr::computeVTACallgraph(*IRDB->getModule(), - BaseCG.getCallGraph(), &AS, VTP); + auto CG = computeVTACallGraph(IRDB, VTP, &AS, BaseCG); for (const auto &Entry : GT) { - const auto *CS = IRDB->getInstruction(Entry.CSId); + const auto *CS = IRDB.getInstruction(Entry.CSId); ASSERT_NE(nullptr, CS); - ASSERT_TRUE(llvm::isa(CS)); + ASSERT_TRUE(llvm::isa(CS)) + << "CS " << psr::llvmIRToString(CS) << " is no call-site!"; auto &&Callees = CG.getCalleesOfCallAt(CS); EXPECT_EQ(Entry.Callees.size(), Callees.size()); @@ -107,9 +123,9 @@ class VTACallGraphTest : public ::testing::Test { }; TEST_F(VTACallGraphTest, VirtualCallSite_InterProcCallSite) { - doAnalysisAndCompareResults("virtual_callsites/interproc_callsite_cpp.ll", + doAnalysisAndCompareResults("virtual_callsites/interproc_callsite_cpp_dbg.ll", { - {16, {"_ZN7Derived3barEv"}}, + {17, {"_ZN7Derived3barEv"}}, }); } @@ -125,8 +141,8 @@ TEST_F(VTACallGraphTest, PathTracing_Inter12) { doAnalysisAndCompareResults( "path_tracing/inter_12_cpp_dbg.ll", { - {33, {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, - {45, {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, + {30, {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, + {39, {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, }); } @@ -151,7 +167,7 @@ TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer3) { }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall2) { - doAnalysisAndCompareResults("call_graphs/virtual_call_2_cpp.ll", + doAnalysisAndCompareResults("call_graphs/virtual_call_2_cpp_dbg.ll", { {20, {"_ZN1B3fooEv"}}, }); @@ -160,12 +176,12 @@ TEST_F(VTACallGraphTest, CallGraphs_VirtualCall3) { // Use the dbg version, because VTA relies on !heapallocsite metadata doAnalysisAndCompareResults("call_graphs/virtual_call_3_cpp_dbg.ll", { - {22, {"_ZN5AImpl3fooEv"}}, - {30, {"_ZN5AImplD0Ev"}}, + {19, {"_ZN5AImpl3fooEv"}}, + {26, {"_ZN5AImplD0Ev"}}, }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall4) { - doAnalysisAndCompareResults("call_graphs/virtual_call_4_cpp.ll", + doAnalysisAndCompareResults("call_graphs/virtual_call_4_cpp_dbg.ll", { {20, {"_ZN1B3fooEv"}}, }); @@ -174,28 +190,28 @@ TEST_F(VTACallGraphTest, CallGraphs_VirtualCall5) { // Use the dbg version, because VTA relies on !heapallocsite metadata doAnalysisAndCompareResults("call_graphs/virtual_call_5_cpp_dbg.ll", { - {24, {"_ZN1B5VfuncEv"}}, - {32, {"_ZN1BD0Ev"}}, + {21, {"_ZN1B5VfuncEv"}}, + {28, {"_ZN1BD0Ev"}}, }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall7) { // Use the dbg version, because VTA relies on !heapallocsite metadata doAnalysisAndCompareResults("call_graphs/virtual_call_7_cpp_dbg.ll", { - {28, {"_ZN1A5VfuncEv"}}, - {34, {"_ZN1B5VfuncEv"}}, - {42, {"_ZN1AD0Ev"}}, + {24, {"_ZN1A5VfuncEv"}}, + {29, {"_ZN1B5VfuncEv"}}, + {36, {"_ZN1AD0Ev"}}, }); } -TEST_F(VTACallGraphTest, DISABLED_CallGraphs_VirtualCall8) { +TEST_F(VTACallGraphTest, CallGraphs_VirtualCall8) { // Use the dbg version, because VTA relies on !heapallocsite metadata // Note: The VTA analysis is neither flow-, nor context-sensitive doAnalysisAndCompareResults( "call_graphs/virtual_call_8_cpp_dbg.ll", { - {26, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, - {32, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {22, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {27, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { @@ -204,8 +220,8 @@ TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { doAnalysisAndCompareResults( "call_graphs/virtual_call_9_cpp_dbg.ll", { - {85, {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, - {93, {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, + {72, {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, + {79, {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, }); } // TODO: More tests! diff --git a/unittests/Utils/CMakeLists.txt b/unittests/Utils/CMakeLists.txt index 06c6717dca..d84306e783 100644 --- a/unittests/Utils/CMakeLists.txt +++ b/unittests/Utils/CMakeLists.txt @@ -11,7 +11,6 @@ set(UtilsSources OnTheFlyAnalysisPrinterTest.cpp SourceMgrPrinterTest.cpp SCCGenericTest.cpp - VTACallGraphTest.cpp ) if(PHASAR_ENABLE_DYNAMIC_LOG) From 02ffadef526f503a75454a048e5f87fbbf21d768 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 3 Sep 2025 14:50:35 +0200 Subject: [PATCH 14/27] Add call-graph tool to generate call-graphs for arbitrary LLVM IR files + compute CG statistics --- cmake/phasar_macros.cmake | 6 +- .../ControlFlow/EntryFunctionUtils.h | 3 + .../ControlFlow/Resolver/VTAResolver.h | 11 +- .../phasar/PhasarLLVM/DB/LLVMProjectIRDB.h | 4 + include/phasar/Utils/AlignNum.h | 65 +++++ .../ControlFlow/EntryFunctionUtils.cpp | 12 + .../ControlFlow/Resolver/VTAResolver.cpp | 11 +- lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp | 21 ++ .../Passes/GeneralStatisticsAnalysis.cpp | 32 +-- tools/CMakeLists.txt | 41 ++- tools/call-graph/call-graph.cpp | 272 ++++++++++++++++++ tools/example-tool/CMakeLists.txt | 18 -- tools/hello-modules-tool/CMakeLists.txt | 18 -- tools/phasar-cli/CMakeLists.txt | 33 --- tools/phasar-cli/Controller/CMakeLists.txt | 3 - 15 files changed, 431 insertions(+), 119 deletions(-) create mode 100644 include/phasar/Utils/AlignNum.h create mode 100644 tools/call-graph/call-graph.cpp delete mode 100644 tools/example-tool/CMakeLists.txt delete mode 100644 tools/hello-modules-tool/CMakeLists.txt delete mode 100644 tools/phasar-cli/CMakeLists.txt delete mode 100644 tools/phasar-cli/Controller/CMakeLists.txt diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index 2bd952852c..e250bf10cb 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -353,7 +353,7 @@ function(add_phasar_library name) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) endfunction(add_phasar_library) -macro(subdirlist result curdir) +function(subdirlist result curdir) file(GLOB children RELATIVE ${curdir} ${curdir}/*) set(dirlist "") @@ -363,5 +363,5 @@ macro(subdirlist result curdir) endif() endforeach() - set(${result} ${dirlist}) -endmacro(subdirlist) + set(${result} ${dirlist} PARENT_SCOPE) +endfunction(subdirlist) diff --git a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h index 165bc20229..78003dcd9b 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h +++ b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h @@ -26,6 +26,9 @@ getEntryFunctions(const LLVMProjectIRDB &IRDB, [[nodiscard]] std::vector getEntryFunctionsMut(LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints); + +[[nodiscard]] std::vector +getDefaultEntryPoints(const LLVMProjectIRDB &IRDB); } // namespace psr #endif // PHASAR_PHASARLLVM_UTILS_ENTRYFUNCTIONUTILS_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index c16174b0ee..2ad7acf6f3 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -16,6 +16,7 @@ #include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/SCCGeneric.h" #include "llvm/ADT/STLExtras.h" @@ -25,11 +26,13 @@ class VTAResolver : public Resolver { public: explicit VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const LLVMBasedCallGraph *BaseCG, vta::AliasInfoTy AS); + MaybeUniquePtr BaseCG, + vta::AliasInfoTy AS); explicit VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const LLVMBasedCallGraph *BaseCG, LLVMAliasInfoRef AS) - : VTAResolver(IRDB, VTP, BaseCG, + MaybeUniquePtr BaseCG, + LLVMAliasInfoRef AS) + : VTAResolver(IRDB, VTP, std::move(BaseCG), [AS](const llvm::Value *Ptr, const llvm::Instruction *At, vta::AliasHandlerTy WithAlias) { auto ASet = AS.getAliasSet(Ptr, At); @@ -50,7 +53,7 @@ class VTAResolver : public Resolver { void resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) override; - const LLVMBasedCallGraph *BaseCG{}; + MaybeUniquePtr BaseCG{}; vta::TypeAssignment TA{}; SCCHolder SCCs{}; Compressor Nodes; diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 03b6c03ea8..c7adb60f88 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -100,6 +100,10 @@ class LLVMProjectIRDB : public ProjectIRDBBase { [[nodiscard]] static llvm::ErrorOr load(const llvm::Twine &IRFileName, bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14); + [[nodiscard]] static LLVMProjectIRDB + loadOrExit(const llvm::Twine &IRFileName, + bool EnableOpaquePointers = LLVM_VERSION_MAJOR > 14, + int ErrorExitCode = 1); /// Also use the const overload using ProjectIRDBBase::getFunction; diff --git a/include/phasar/Utils/AlignNum.h b/include/phasar/Utils/AlignNum.h new file mode 100644 index 0000000000..152a8f824a --- /dev/null +++ b/include/phasar/Utils/AlignNum.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel + *****************************************************************************/ + +#ifndef PHASAR_UTILS_ALIGNNUM_H +#define PHASAR_UTILS_ALIGNNUM_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" + +namespace psr { + +template struct AlignNum { + llvm::StringRef Name; + T Num; + + constexpr AlignNum(llvm::StringRef Name, T Num) noexcept + : Name(Name), Num(Num) {} + constexpr AlignNum(llvm::StringRef Name, size_t Numerator, + size_t Denominator) noexcept + : Name(Name), Num(double(Numerator) / double(Denominator)) {} + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const AlignNum &AN) { + auto Len = AN.Name.size() + 1; + auto Diff = -(Len < NumOffs) & (NumOffs - Len); + + OS << AN.Name << ':'; + // Default is two fixed-point decimal places, so shift the output by three + // spaces + OS.indent(Diff + std::is_floating_point_v * 3); + OS << llvm::formatv("{0,+7}\n", AN.Num); + + return OS; + } +}; +template AlignNum(llvm::StringRef, T) -> AlignNum; +AlignNum(llvm::StringRef, size_t, size_t) -> AlignNum; + +template struct AlignStr { + llvm::StringRef Name; + llvm::StringRef Value; + + constexpr AlignStr(llvm::StringRef Name, llvm::StringRef Value) noexcept + : Name(Name), Value(Value) {} + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const AlignStr &AS) { + auto Len = AS.Name.size(); + auto Diff = -(Len < NumOffs) & (NumOffs - Len); + + OS << AS.Name << ':'; + OS.indent(Diff); + return OS << AS.Value << '\n'; + } +}; +} // namespace psr + +#endif // PHASAR_UTILS_ALIGNNUM_H diff --git a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp index 06986f9c2d..13d41123fd 100644 --- a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp +++ b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp @@ -1,5 +1,6 @@ #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/Utils/Logger.h" @@ -66,3 +67,14 @@ psr::getEntryFunctionsMut(LLVMProjectIRDB &IRDB, } return UserEntryPointFns; } + +std::vector +psr::getDefaultEntryPoints(const LLVMProjectIRDB &IRDB) { + if (IRDB.getFunctionDefinition(GlobalCtorsDtorsModel::ModelName)) { + return {GlobalCtorsDtorsModel::DtorModelName.str()}; + } + if (IRDB.getFunctionDefinition("main")) { + return {"main"}; + } + return {"__ALL__"}; +} diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp index 6f8611dd60..198645fbc7 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -14,12 +14,13 @@ using namespace psr; VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const LLVMBasedCallGraph *BaseCG, vta::AliasInfoTy AS) - : Resolver(IRDB, VTP), BaseCG(BaseCG) { - assert(BaseCG != nullptr); + MaybeUniquePtr BaseCG, + vta::AliasInfoTy AS) + : Resolver(IRDB, VTP), BaseCG(std::move(BaseCG)) { + assert(this->BaseCG != nullptr); - auto TAG = - vta::computeTypeAssignmentGraph(*IRDB->getModule(), *BaseCG, AS, *VTP); + auto TAG = vta::computeTypeAssignmentGraph(*IRDB->getModule(), *this->BaseCG, + AS, *VTP); SCCs = computeSCCs(TAG); auto Deps = computeSCCDependencies(TAG, SCCs); diff --git a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp index 27c35db533..77a1c6af76 100644 --- a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp +++ b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/WithColor.h" #include #include @@ -155,6 +156,20 @@ LLVMProjectIRDB::load(const llvm::Twine &IRFileName, return LLVMProjectIRDB(std::move(*M), std::move(Ctx), EnableOpaquePointers); } +LLVMProjectIRDB LLVMProjectIRDB::loadOrExit(const llvm::Twine &IRFileName, + bool EnableOpaquePointers, + int ErrorExitCode) { + auto Ret = load(IRFileName, EnableOpaquePointers); + if (!Ret) { + llvm::WithColor::error() + << "Could not load LLVM-" << LLVM_VERSION_MAJOR << " IR file " + << IRFileName << ": " << Ret.getError().message() << '\n'; + std::exit(ErrorExitCode); + } + + return std::move(*Ret); +} + LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName, bool EnableOpaquePointers) : Ctx(new llvm::LLVMContext()) { @@ -162,6 +177,9 @@ LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName, auto M = getParsedIRModuleOrErr(IRFileName, *Ctx); if (!M) { + llvm::WithColor::error() + << "Could not load LLVM-" << LLVM_VERSION_MAJOR << " IR file " + << IRFileName << ": " << M.getError().message() << '\n'; return; } @@ -264,6 +282,9 @@ LLVMProjectIRDB::LLVMProjectIRDB(llvm::MemoryBufferRef Buf, setOpaquePointersForCtx(*Ctx, EnableOpaquePointers); auto M = getParsedIRModuleOrErr(Buf, *Ctx); if (!M) { + llvm::WithColor::error() << "Could not load " << LLVM_VERSION_MAJOR + << " IR buffer: " << Buf.getBufferIdentifier() + << ": " << M.getError().message() << '\n'; return; } diff --git a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp index 4575fa59ea..8d2d32ab4d 100644 --- a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp +++ b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp @@ -10,6 +10,7 @@ #include "phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/AlignNum.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/NlohmannLogging.h" #include "phasar/Utils/PAMMMacros.h" @@ -23,11 +24,9 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include -#include namespace psr { @@ -297,35 +296,6 @@ void GeneralStatistics::printAsJson(llvm::raw_ostream &OS) const { } // namespace psr -namespace { -template struct AlignNum { - llvm::StringRef Name; - T Num; - - AlignNum(llvm::StringRef Name, T Num) noexcept : Name(Name), Num(Num) {} - AlignNum(llvm::StringRef Name, size_t Numerator, size_t Denominator) noexcept - : Name(Name), Num(double(Numerator) / double(Denominator)) {} - - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const AlignNum &AN) { - static constexpr size_t NumOffs = 32; - - auto Len = AN.Name.size() + 1; - auto Diff = -(Len < NumOffs) & (NumOffs - Len); - - OS << AN.Name << ':'; - // Default is two fixed-point decimal places, so shift the output by three - // spaces - OS.indent(Diff + std::is_floating_point_v * 3); - OS << llvm::formatv("{0,+7}\n", AN.Num); - - return OS; - } -}; -template AlignNum(llvm::StringRef, T) -> AlignNum; -AlignNum(llvm::StringRef, size_t, size_t)->AlignNum; -} // namespace - llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, const GeneralStatistics &Statistics) { return OS diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a1b50268ee..0cd9236476 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,5 +1,38 @@ -add_subdirectory(example-tool) -add_subdirectory(phasar-cli) -if (PHASAR_BUILD_MODULES) - add_subdirectory(hello-modules-tool) + +subdirlist(subdirs ${CMAKE_CURRENT_SOURCE_DIR}) +if (NOT PHASAR_BUILD_MODULES) + list(REMOVE_ITEM subdirs hello-modules-tool) +endif() + +foreach(tool ${subdirs}) + message(STATUS "Set-up phasar-tool: ${tool}") + + file(GLOB_RECURSE tool_src "${tool}/*.cpp" "${tool}/*.h") + if(PHASAR_IN_TREE) + add_phasar_executable(${tool} + ${tool_src} + ) + else() + add_executable(${tool} + ${tool_src} + ) + endif() + + set_target_properties(${tool} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${tool} + ) + + target_link_libraries(${tool} + PRIVATE + phasar + ${PHASAR_STD_FILESYSTEM} + ) +endforeach() + +set_target_properties(example-tool PROPERTIES + OUTPUT_NAME "myphasartool" +) + +if (NOT PHASAR_IN_TREE) + install(TARGETS phasar-cli) endif() diff --git a/tools/call-graph/call-graph.cpp b/tools/call-graph/call-graph.cpp new file mode 100644 index 0000000000..98b3bb8f48 --- /dev/null +++ b/tools/call-graph/call-graph.cpp @@ -0,0 +1,272 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/Pointer/AliasAnalysisType.h" +#include "phasar/Utils/AlignNum.h" + +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace cl = llvm::cl; + +static cl::OptionCategory CGCat("PhASAR CallGraph"); + +static cl::opt EmitCGAsDot( + "emit-cg-as-dot", + cl::desc("Output the computed call-graph as DOT graph that can be " + "displayed with any graphviz viewer (default: true)"), + cl::init(true), cl::cat(CGCat)); + +static cl::opt EmitCGAsJson( + "emit-cg-as-json", + cl::desc("Output the computed call-graph as JSON (default: false)"), + cl::cat(CGCat)); + +static cl::opt + OutputFile("o", + cl::desc("The file-path, where the output should be written to " + "(default: stdout)"), + cl::init("-"), cl::cat(CGCat)); + +static cl::opt + CGType("cg-type", cl::desc("The call-graph analysis type to use"), + cl::ValuesClass{ +#define CALL_GRAPH_ANALYSIS_TYPE(NAME, CMDFLAG, DESC) \ + clEnumValN(psr::CallGraphAnalysisType::NAME, CMDFLAG, DESC), +#include "phasar/ControlFlow/CallGraphAnalysisType.def" + }, + cl::init(psr::CallGraphAnalysisType::OTF), cl::cat(CGCat)); + +static cl::opt + AAType("aa-type", + cl::desc("The alias-analysis type for those call-graph " + "algorithms that require alias information"), + cl::ValuesClass{ +#define ALIAS_ANALYSIS_TYPE(NAME, CMDFLAG, DESC) \ + clEnumValN(psr::AliasAnalysisType::NAME, CMDFLAG, DESC), +#include "phasar/Pointer/AliasAnalysisType.def" + }, + cl::init(psr::AliasAnalysisType::CFLAnders), cl::cat(CGCat)); + +static cl::opt + EmitStats("S", cl::desc("Compute statistics on the computed call-graph"), + cl::cat(CGCat)); + +static cl::opt IRFile(cl::Positional, cl::Required, + cl::desc("The LLVM IR file to analyze"), + cl::cat(CGCat)); + +static void computeCGStats(const psr::LLVMBasedCallGraph &CG, + llvm::raw_ostream &OS); + +int main(int Argc, char *Argv[]) { + cl::HideUnrelatedOptions(CGCat); + cl::ParseCommandLineOptions(Argc, Argv); + + auto IRDB = psr::LLVMProjectIRDB::loadOrExit(IRFile); + auto VTP = psr::LLVMVFTableProvider(IRDB); + auto TH = psr::DIBasedTypeHierarchy(IRDB); + auto EntryPoints = psr::getDefaultEntryPoints(IRDB); + + auto CG = [&] { + switch (CGType) { + case psr::CallGraphAnalysisType::NORESOLVE: + case psr::CallGraphAnalysisType::CHA: + case psr::CallGraphAnalysisType::RTA: { + auto Res = psr::Resolver::create(CGType, &IRDB, &VTP, &TH); + return psr::buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints); + } + case psr::CallGraphAnalysisType::VTA: { + auto BaseRes = psr::RTAResolver(&IRDB, &VTP, &TH); + auto BaseCG = psr::buildLLVMBasedCallGraph(IRDB, BaseRes, EntryPoints); + auto AA = psr::LLVMAliasSet(&IRDB, true, AAType); + auto Res = psr::VTAResolver(&IRDB, &VTP, &BaseCG, &AA); + return psr::buildLLVMBasedCallGraph(IRDB, Res, EntryPoints); + } + case psr::CallGraphAnalysisType::OTF: { + auto AA = psr::LLVMAliasSet(&IRDB, true, AAType); + auto Res = psr::OTFResolver(&IRDB, &VTP, &AA); + return psr::buildLLVMBasedCallGraph(IRDB, Res, EntryPoints); + } + case psr::CallGraphAnalysisType::Invalid: + llvm::report_fatal_error("Invalid call-graph analysis type"); + } + }(); + + std::optional OS; + const auto GetOS = [&OS]() -> llvm::raw_ostream & { + if (!OS) { + std::error_code EC; + OS.emplace(OutputFile, EC); + if (EC) { + llvm::WithColor::error() + << "Could not open output-file: " << EC.message() << '\n'; + std::exit(1); + } + } + return *OS; + }; + + auto ICF = psr::LLVMBasedICFG(std::move(CG), &IRDB); + + if (EmitCGAsDot) { + ICF.print(GetOS()); + } + if (EmitCGAsJson) { + ICF.printAsJson(GetOS()); + } + if (EmitStats) { + computeCGStats(ICF.getCallGraph(), GetOS()); + } +} + +static constexpr unsigned Indent = 48; + +template struct Align : psr::AlignNum { + using psr::AlignNum::AlignNum; +}; +template Align(llvm::StringRef, T) -> Align; +Align(llvm::StringRef, size_t, size_t) -> Align; + +using AlignS = psr::AlignStr; + +static void computeCGStats(const psr::LLVMBasedCallGraph &CG, + llvm::raw_ostream &OS) { + size_t NumVtxFuns = CG.getNumVertexFunctions(); + size_t NumVtxCS = CG.getNumVertexCallSites(); + + size_t NumIndCalls = 0; + size_t NumCallEdges = 0; + size_t NumIndCallEdges = 0; + + size_t NumIndCSWith0Callees = 0; + size_t NumIndCSWith1Callees = 0; + size_t NumIndCSWith2Callees = 0; + size_t NumIndCSWithGreater2Callees = 0; + size_t NumIndCSWithGreater5Callees = 0; + size_t NumIndCSWithGreater10Callees = 0; + size_t NumIndCSWithGreater20Callees = 0; + size_t NumIndCSWithGreater50Callees = 0; + size_t NumIndCSWithGreater100Callees = 0; + size_t LargestFanOut = 0; + + std::vector NumCallEdgesPerCS; + std::vector NumCallEdgesPerIndCS; + NumCallEdgesPerCS.reserve(NumVtxCS); + NumCallEdgesPerIndCS.reserve(NumVtxCS); + + for (const auto *CS : CG.getAllVertexCallSites()) { + bool IsIndCall = + !llvm::isa(llvm::cast(CS) + ->getCalledOperand() + ->stripPointerCastsAndAliases()); + + auto Callees = CG.getCalleesOfCallAt(CS); + NumIndCalls += IsIndCall; + NumCallEdges += Callees.size(); + NumIndCallEdges += Callees.size() * IsIndCall; + NumCallEdgesPerCS.push_back(Callees.size()); + if (IsIndCall) { + NumCallEdgesPerIndCS.push_back(Callees.size()); + } + if (Callees.size() > LargestFanOut) { + LargestFanOut = Callees.size(); + } + + NumIndCSWith0Callees += Callees.empty(); + NumIndCSWith1Callees += Callees.size() == 1 && IsIndCall; + NumIndCSWith2Callees += Callees.size() == 2; + NumIndCSWithGreater2Callees += Callees.size() > 2; + NumIndCSWithGreater5Callees += Callees.size() > 5; + NumIndCSWithGreater10Callees += Callees.size() > 10; + NumIndCSWithGreater20Callees += Callees.size() > 20; + NumIndCSWithGreater50Callees += Callees.size() > 50; + NumIndCSWithGreater100Callees += Callees.size() > 100; + } + + llvm::sort(NumCallEdgesPerCS); + llvm::sort(NumCallEdgesPerIndCS); + + OS << "================== CallGraph Statistics ==================\n"; + + OS << Align("Num vertex functions", NumVtxFuns); + OS << Align("Num call-sites", NumVtxCS); + OS << Align("Num call-edges", NumCallEdges); + if (NumCallEdgesPerCS.empty()) { + OS << AlignS("Avg num call-edges per call-site", ""); + OS << AlignS("Med num call-edges per call-site", ""); + OS << AlignS("90% num call-edges per call-site", ""); + } else { + OS << Align("Avg num call-edges per call-site", + double(NumCallEdges) / double(NumVtxCS)); + OS << Align("Med num call-edges per call-site", + NumCallEdgesPerCS[NumCallEdgesPerCS.size() / 2]); + OS << Align( + "90% num call-edges per call-site", + NumCallEdgesPerCS[size_t(double(NumCallEdgesPerCS.size()) * 0.9)]); + } + OS << '\n'; + OS << Align("Num indirect call-sites", NumIndCalls); + OS << Align("Num indirect call-edges", NumIndCallEdges); + + if (NumCallEdgesPerIndCS.empty()) { + OS << AlignS("Avg num call-edges per indirect call-site", ""); + OS << AlignS("Med num call-edges per indirect call-site", ""); + OS << AlignS("90% num call-edges per indirect call-site", ""); + } else { + OS << Align("Avg num call-edges per indirect call-site", + double(NumIndCallEdges) / double(NumIndCalls)); + OS << Align("Med num call-edges per indirect call-site", + NumCallEdgesPerIndCS[NumCallEdgesPerIndCS.size() / 2]); + OS << Align("90% num call-edges per indirect call-site", + NumCallEdgesPerIndCS[size_t( + double(NumCallEdgesPerIndCS.size()) * 0.9)]); + } + OS << Align("Largest fanout (max num callees per call-site)", LargestFanOut); + + OS << '\n'; + OS << Align("Num indirect calls with 0 resolved callees", + NumIndCSWith0Callees); + OS << Align("Num indirect calls with 1 resolved callee", + NumIndCSWith1Callees); + OS << Align("Num indirect calls with 2 resolved callees", + NumIndCSWith2Callees); + OS << Align("Num indirect calls with > 2 resolved callees", + NumIndCSWithGreater2Callees); + OS << Align("Num indirect calls with > 5 resolved callees", + NumIndCSWithGreater5Callees); + OS << Align("Num indirect calls with > 10 resolved callees", + NumIndCSWithGreater10Callees); + OS << Align("Num indirect calls with > 20 resolved callees", + NumIndCSWithGreater20Callees); + OS << Align("Num indirect calls with > 50 resolved callees", + NumIndCSWithGreater50Callees); + OS << Align("Num indirect calls with >100 resolved callees", + NumIndCSWithGreater100Callees); +} diff --git a/tools/example-tool/CMakeLists.txt b/tools/example-tool/CMakeLists.txt deleted file mode 100644 index 2a2d547661..0000000000 --- a/tools/example-tool/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Build a stand-alone executable -if(PHASAR_IN_TREE) - # Build a small test tool to show how phasar may be used - add_phasar_executable(myphasartool - myphasartool.cpp - ) -else() - # Build a small test tool to show how phasar may be used - add_executable(myphasartool - myphasartool.cpp - ) -endif() - -target_link_libraries(myphasartool - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) diff --git a/tools/hello-modules-tool/CMakeLists.txt b/tools/hello-modules-tool/CMakeLists.txt deleted file mode 100644 index f38f50030e..0000000000 --- a/tools/hello-modules-tool/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Build a stand-alone executable -if(PHASAR_IN_TREE) - # Build a small test tool to show how phasar may be used - add_phasar_executable(hello-modules - hello_modules.cpp - ) -else() - # Build a small test tool to show how phasar may be used - add_executable(hello-modules - hello_modules.cpp - ) -endif() - -target_link_libraries(hello-modules - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) diff --git a/tools/phasar-cli/CMakeLists.txt b/tools/phasar-cli/CMakeLists.txt deleted file mode 100644 index f25ce28f7c..0000000000 --- a/tools/phasar-cli/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Analysis - BitWriter - Core - Demangle - IRReader - Linker - Passes - Support -) - -# Build a stand-alone executable -if(PHASAR_IN_TREE) - add_phasar_executable(phasar-cli - phasar-cli.cpp - ) -else() - add_executable(phasar-cli - phasar-cli.cpp - ) -endif() - -add_subdirectory(Controller) - -target_link_libraries(phasar-cli - PRIVATE - phasar - ${PHASAR_STD_FILESYSTEM} -) - -if (NOT PHASAR_IN_TREE) - install(TARGETS phasar-cli) -endif() diff --git a/tools/phasar-cli/Controller/CMakeLists.txt b/tools/phasar-cli/Controller/CMakeLists.txt deleted file mode 100644 index 5977a5aefc..0000000000 --- a/tools/phasar-cli/Controller/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -file(GLOB_RECURSE CONTROLLER_SRC *.h *.cpp) - -target_sources(phasar-cli PRIVATE ${CONTROLLER_SRC}) From f038eaf1ad1702debabc83d1971d1a435ee443f4 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 5 Sep 2025 13:57:30 +0200 Subject: [PATCH 15/27] Add resolver-based TAG construction --- .../Resolver/PrecomputedResolver.h | 51 ++++++++++ .../ControlFlow/Resolver/Resolver.h | 24 ++++- .../ControlFlow/Resolver/VTAResolver.h | 56 ++++++++--- .../ControlFlow/VTA/TypeAssignmentGraph.h | 18 ++-- .../ControlFlow/LLVMBasedCallGraphBuilder.cpp | 3 +- .../Resolver/PrecomputedResolver.cpp | 20 ++++ .../ControlFlow/Resolver/Resolver.cpp | 31 +++++-- .../ControlFlow/Resolver/VTAResolver.cpp | 52 ++++++++--- .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 93 ++++++++++--------- tools/call-graph/call-graph.cpp | 22 ++++- .../ControlFlow/VTACallGraphTest.cpp | 2 +- 11 files changed, 280 insertions(+), 92 deletions(-) create mode 100644 include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h create mode 100644 lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h new file mode 100644 index 0000000000..04304d6c28 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_PRECOMPUTEDRESOLVER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_PRECOMPUTEDRESOLVER_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/Utils/MaybeUniquePtr.h" + +namespace psr { +/// \brief A Resolver that uses a pre-computed call-graph to resolve indirect +/// calls. +/// +/// \note We eventually may want the LLVMBasedCallGraph to *be* a Resolver. This +/// requires the concept of resolvers to generalize beyond LLVM. See +/// for +/// reference +class PrecomputedResolver : public Resolver { +public: + PrecomputedResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG); + + [[nodiscard]] bool + mutatesHelperAnalysisInformation() const noexcept override { + return false; + } + + void resolveVirtualCall(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override { + resolveFunctionPointer(PossibleTargets, CallSite); + } + + void resolveFunctionPointer(FunctionSetTy &PossibleTargets, + const llvm::CallBase *CallSite) override; + + [[nodiscard]] std::string str() const override; + +private: + MaybeUniquePtr BaseCG; +}; +} // namespace psr + +#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 09b8147424..0ade24735e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -17,9 +17,12 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/IR/DerivedTypes.h" #include @@ -115,11 +118,22 @@ class Resolver { // Conservatively returns true. Override if possible return true; } - static std::unique_ptr create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT = nullptr); + + struct DefaultBaseResolverProvider { + MaybeUniquePtr operator()(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT); + }; + + static std::unique_ptr + create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT = nullptr, + llvm::function_ref( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)> + GetBaseRes = DefaultBaseResolverProvider{}); protected: virtual void resolveVirtualCall(FunctionSetTy &PossibleTargets, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index 2ad7acf6f3..a93d5f5ce0 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -10,34 +10,60 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" -#include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/SCCGeneric.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" namespace psr { class VTAResolver : public Resolver { public: + struct DefaultReachableFunctions { + void operator()(const LLVMProjectIRDB &IRDB, + llvm::function_ref WithFun); + }; + explicit VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - MaybeUniquePtr BaseCG, - vta::AliasInfoTy AS); + const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + MaybeUniquePtr BaseCG); explicit VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - MaybeUniquePtr BaseCG, - LLVMAliasInfoRef AS) - : VTAResolver(IRDB, VTP, std::move(BaseCG), - [AS](const llvm::Value *Ptr, const llvm::Instruction *At, - vta::AliasHandlerTy WithAlias) { - auto ASet = AS.getAliasSet(Ptr, At); - llvm::for_each(*ASet, WithAlias); - }) {} + const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, + MaybeUniquePtr BaseCG) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseCG)) {} + + explicit VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions = DefaultReachableFunctions{}); + explicit VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions = DefaultReachableFunctions{}) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseRes), ReachableFunctions) {} [[nodiscard]] std::string str() const override; @@ -53,7 +79,7 @@ class VTAResolver : public Resolver { void resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) override; - MaybeUniquePtr BaseCG{}; + MaybeUniquePtr BaseCG{}; vta::TypeAssignment TA{}; SCCHolder SCCs{}; Compressor Nodes; diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index 505e753d75..fa32b8f3c3 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -11,9 +11,7 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_TYPEASSIGNMENTGRAPH_H #include "phasar/ControlFlow/CallGraph.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" @@ -33,6 +31,12 @@ #include #include +namespace psr { +class Resolver; +class LLVMProjectIRDB; +class LLVMVFTableProvider; +} // namespace psr + namespace psr::vta { enum class TAGNodeId : uint32_t {}; @@ -151,12 +155,14 @@ using AliasHandlerTy = llvm::function_ref; using AliasInfoTy = llvm::function_ref; +using ReachableFunsHandlerTy = llvm::function_ref; +using ReachableFunsTy = + llvm::function_ref; + // TODO: Use AliasIterator here, once available [[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - AliasInfoTy AS, const psr::LLVMVFTableProvider &VTP); + const LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, + AliasInfoTy AS, Resolver &BaseRes, ReachableFunsTy ReachableFunctions); void printNode(llvm::raw_ostream &OS, TAGNode TN); }; // namespace psr::vta diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index e3f65c00ec..8a2477d88f 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -3,6 +3,7 @@ #include "phasar/ControlFlow/CallGraphAnalysisType.h" #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" @@ -275,7 +276,7 @@ auto psr::buildLLVMBasedCallGraph( PT = PTOwn.asRef(); } - auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH); + auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH, PT); return buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints, S); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp new file mode 100644 index 0000000000..d249a40346 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.cpp @@ -0,0 +1,20 @@ +#include "phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h" + +#include "llvm/IR/InstrTypes.h" + +using namespace psr; + +PrecomputedResolver::PrecomputedResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG) + : Resolver(IRDB, VTP), BaseCG(std::move(BaseCG)) { + assert(this->BaseCG != nullptr); +} + +void PrecomputedResolver::resolveFunctionPointer( + FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) { + auto Callees = BaseCG->getCalleesOfCallAt(CallSite); + PossibleTargets.insert(Callees.begin(), Callees.end()); +} + +std::string PrecomputedResolver::str() const { return "Precomputed"; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 5b916bcd1b..df4464458f 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -17,11 +17,14 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" @@ -208,11 +211,20 @@ void Resolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, void Resolver::otherInst(const llvm::Instruction *Inst) {} -std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT) { +MaybeUniquePtr Resolver::DefaultBaseResolverProvider::operator()( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef /*PT*/) { + return std::make_unique(IRDB, VTP, TH); +} + +std::unique_ptr Resolver::create( + CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT, + llvm::function_ref( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)> + GetBaseRes) { assert(IRDB != nullptr); assert(VTP != nullptr); @@ -225,9 +237,12 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, case CallGraphAnalysisType::RTA: assert(TH != nullptr); return std::make_unique(IRDB, VTP, TH); - case CallGraphAnalysisType::VTA: - llvm::report_fatal_error( - "The VTA callgraph algorithm is not implemented yet"); + case CallGraphAnalysisType::VTA: { + assert(PT); + auto BaseRes = GetBaseRes(IRDB, VTP, TH, PT); + assert(BaseRes != nullptr); + return std::make_unique(IRDB, VTP, PT, std::move(BaseRes)); + } case CallGraphAnalysisType::OTF: assert(PT); return std::make_unique(IRDB, VTP, PT); diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp index 198645fbc7..3f90a68e2b 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -1,26 +1,58 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/PrecomputedResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/SCCGeneric.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/InstrTypes.h" using namespace psr; +void VTAResolver::DefaultReachableFunctions::operator()( + const LLVMProjectIRDB &IRDB, + llvm::function_ref WithFun) { + llvm::for_each(IRDB.getAllFunctions(), WithFun); +} + +static VTAResolver createWithBaseCGResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + MaybeUniquePtr BaseCG, vta::AliasInfoTy AS) { + auto ReachableFunctions = + [BaseCG = BaseCG.get()]( + const LLVMProjectIRDB &, + llvm::function_ref WithFun) { + llvm::for_each(BaseCG->getAllVertexFunctions(), WithFun); + }; + auto BaseRes = + std::make_unique(IRDB, VTP, std::move(BaseCG)); + + return VTAResolver(IRDB, VTP, AS, std::move(BaseRes), ReachableFunctions); +} + VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - MaybeUniquePtr BaseCG, - vta::AliasInfoTy AS) - : Resolver(IRDB, VTP), BaseCG(std::move(BaseCG)) { + const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + MaybeUniquePtr BaseCG) + : psr::VTAResolver( + createWithBaseCGResolver(IRDB, VTP, std::move(BaseCG), AS)) {} + +VTAResolver::VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions) + : Resolver(IRDB, VTP), BaseCG(std::move(BaseRes)) { assert(this->BaseCG != nullptr); - auto TAG = vta::computeTypeAssignmentGraph(*IRDB->getModule(), *this->BaseCG, - AS, *VTP); + auto TAG = vta::computeTypeAssignmentGraph(*IRDB, *VTP, AS, *this->BaseCG, + ReachableFunctions); SCCs = computeSCCs(TAG); auto Deps = computeSCCDependencies(TAG, SCCs); @@ -55,9 +87,7 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, auto *VT = CallSite->getCalledOperand()->stripPointerCastsAndAliases(); auto VtableIndex = RetrievedVtableIndex.value(); - auto BaseCalleesVec = BaseCG->getCalleesOfCallAt(CallSite); - llvm::SmallDenseSet BaseCallees( - BaseCalleesVec.begin(), BaseCalleesVec.end()); + auto BaseCallees = BaseCG->resolveIndirectCall(CallSite); auto ReceiverIdx = CallSite->hasStructRetAttr(); if (CallSite->arg_size() > ReceiverIdx) { @@ -111,9 +141,7 @@ void VTAResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, // llvm::errs() << "[resolveFunctionPointer] At " << llvmIRToString(CallSite) // << '\n'; - auto BaseCalleesVec = BaseCG->getCalleesOfCallAt(CallSite); - llvm::SmallDenseSet BaseCallees( - BaseCalleesVec.begin(), BaseCalleesVec.end()); + auto BaseCallees = BaseCG->resolveIndirectCall(CallSite); auto TNId = Nodes.getOrNull({vta::Variable{ CallSite->getCalledOperand()->stripPointerCastsAndAliases()}}); diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index 1d18b4175e..7edd675c04 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -10,6 +10,8 @@ #include "phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" @@ -184,15 +186,13 @@ static void addTAGNode(TAGNode TN, TypeAssignmentGraph &TAG) { TAG.Nodes.getOrInsert(TN); } -static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, +static void addFields(const LLVMProjectIRDB &IRDB, TypeAssignmentGraph &TAG, const llvm::DataLayout &DL) { - auto &&Structs = Mod.getIdentifiedStructTypes(); - TAG.Nodes.reserve(TAG.Nodes.size() + Structs.size()); size_t PointerSize = DL.getPointerSize(); llvm::DebugInfoFinder DIF; - DIF.processModule(Mod); + DIF.processModule(*IRDB.getModule()); for (auto *DITy : DIF.types()) { if (auto *CompTy = llvm::dyn_cast(DITy)) { @@ -205,11 +205,11 @@ static void addFields(const llvm::Module &Mod, TypeAssignmentGraph &TAG, } } -static void addGlobals(const llvm::Module &Mod, TypeAssignmentGraph &TAG) { - auto NumGlobals = Mod.global_size(); +static void addGlobals(const LLVMProjectIRDB &IRDB, TypeAssignmentGraph &TAG) { + auto NumGlobals = IRDB.getNumGlobals(); TAG.Nodes.reserve(TAG.Nodes.size() + NumGlobals); - for (const auto &Glob : Mod.globals()) { + for (const auto &Glob : IRDB.getModule()->globals()) { if (Glob.getValueType()->isIntOrIntVectorTy() || Glob.getValueType()->isFloatingPointTy()) { continue; @@ -461,9 +461,7 @@ static void handleEntryForCall(const llvm::CallBase *Call, TAGNodeId CSNod, } static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, - const psr::CallGraph &BaseCG, - const psr::LLVMVFTableProvider &VTP) { + Resolver &BaseRes, const psr::LLVMVFTableProvider &VTP) { llvm::SmallVector> Args; llvm::SmallBitVector EntryArgs; @@ -488,9 +486,14 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, return; } - for (const auto *Callee : BaseCG.getCalleesOfCallAt(Call)) { + const auto HandleCallTarget = [&](const llvm::Function *Callee) { handleEntryForCall(Call, *CSNod, TAG, Callee, VTP); + if (Callee->isDeclaration()) { + // XXX: Integrate with getLibCSummary() + return; + } + for (const auto &[Param, Arg] : llvm::zip(Callee->args(), Args)) { auto ParamNodId = TAG.get({Variable{&Param}}); if (!ParamNodId) { @@ -520,6 +523,15 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, TAG.addEdge(*RetNod, *CSNod); } } + }; + + if (const auto *StaticCallee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + HandleCallTarget(StaticCallee); + } else { + for (const auto *Callee : BaseRes.resolveIndirectCall(Call)) { + HandleCallTarget(Callee); + } } } @@ -546,9 +558,8 @@ static void handleReturn(const llvm::ReturnInst *Ret, } static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, - const psr::CallGraph &BaseCG, - AliasInfoTy AI, const llvm::DataLayout &DL, + Resolver &BaseRes, AliasInfoTy AI, + const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { if (llvm::isa(&I)) { return; @@ -583,7 +594,7 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, } } if (const auto *Call = llvm::dyn_cast(&I)) { - handleCall(Call, TAG, BaseCG, VTP); + handleCall(Call, TAG, BaseRes, VTP); return; } if (const auto *Ret = llvm::dyn_cast(&I)) { @@ -593,51 +604,49 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, // TODO: Handle more cases } -static void buildTAGWithFun( - const llvm::Function *Fun, TypeAssignmentGraph &TAG, - const psr::CallGraph - &BaseCG, - AliasInfoTy AI, const llvm::DataLayout &DL, - const psr::LLVMVFTableProvider &VTP) { +static void buildTAGWithFun(const llvm::Function *Fun, TypeAssignmentGraph &TAG, + Resolver &BaseRes, AliasInfoTy AI, + const llvm::DataLayout &DL, + const psr::LLVMVFTableProvider &VTP) { for (const auto &I : llvm::instructions(Fun)) { - dispatch(I, TAG, BaseCG, AI, DL, VTP); + dispatch(I, TAG, BaseRes, AI, DL, VTP); } } -static auto computeTypeAssignmentGraphImpl( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - AliasInfoTy AI, const psr::LLVMVFTableProvider &VTP) +static auto computeTypeAssignmentGraphImpl(const LLVMProjectIRDB &IRDB, + Resolver &BaseRes, AliasInfoTy AI, + const psr::LLVMVFTableProvider &VTP, + ReachableFunsTy ReachableFunctions) -> TypeAssignmentGraph { TypeAssignmentGraph TAG; - const auto &DL = Mod.getDataLayout(); + const auto &DL = IRDB.getModule()->getDataLayout(); - addFields(Mod, TAG, DL); - addGlobals(Mod, TAG); + addFields(IRDB, TAG, DL); + addGlobals(IRDB, TAG); - for (const auto *Fun : BaseCG.getAllVertexFunctions()) { - initializeWithFun(Fun, TAG); - } + assert(ReachableFunctions); + + ReachableFunctions(IRDB, + [&TAG](const auto *Fun) { initializeWithFun(Fun, TAG); }); TAG.Adj.resize(TAG.Nodes.size()); - for (const auto *Fun : BaseCG.getAllVertexFunctions()) { - buildTAGWithFun(Fun, TAG, BaseCG, AI, DL, VTP); - } + ReachableFunctions(IRDB, [&](const auto *Fun) { + buildTAGWithFun(Fun, TAG, BaseRes, AI, DL, VTP); + }); return TAG; } -auto vta::computeTypeAssignmentGraph( - const llvm::Module &Mod, - const psr::CallGraph - &BaseCG, - AliasInfoTy AS, const psr::LLVMVFTableProvider &VTP) +auto vta::computeTypeAssignmentGraph(const LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + AliasInfoTy AS, Resolver &BaseRes, + ReachableFunsTy ReachableFunctions) -> TypeAssignmentGraph { - return computeTypeAssignmentGraphImpl(Mod, BaseCG, AS, VTP); + return computeTypeAssignmentGraphImpl(IRDB, BaseRes, AS, VTP, + ReachableFunctions); } void TypeAssignmentGraph::print(llvm::raw_ostream &OS) { diff --git a/tools/call-graph/call-graph.cpp b/tools/call-graph/call-graph.cpp index 98b3bb8f48..a3417f3fc9 100644 --- a/tools/call-graph/call-graph.cpp +++ b/tools/call-graph/call-graph.cpp @@ -63,6 +63,12 @@ static cl::opt }, cl::init(psr::CallGraphAnalysisType::OTF), cl::cat(CGCat)); +static cl::opt BuildBaseCG( + "build-base-cg", + cl::desc("Whether to build-up an explicit base-call-graph to " + "initialize the VTA algorithm. May take more time, but may reduce " + "the size of the type-assignment graph")); + static cl::opt AAType("aa-type", cl::desc("The alias-analysis type for those call-graph " @@ -94,6 +100,12 @@ int main(int Argc, char *Argv[]) { auto TH = psr::DIBasedTypeHierarchy(IRDB); auto EntryPoints = psr::getDefaultEntryPoints(IRDB); + if (BuildBaseCG && CGType != psr::CallGraphAnalysisType::VTA) { + llvm::WithColor::warning() << "The option --build-base-cg only works for " + "the cg-type 'vta'. It will be ignored for '" + << CGType << "'\n"; + } + auto CG = [&] { switch (CGType) { case psr::CallGraphAnalysisType::NORESOLVE: @@ -104,9 +116,15 @@ int main(int Argc, char *Argv[]) { } case psr::CallGraphAnalysisType::VTA: { auto BaseRes = psr::RTAResolver(&IRDB, &VTP, &TH); - auto BaseCG = psr::buildLLVMBasedCallGraph(IRDB, BaseRes, EntryPoints); auto AA = psr::LLVMAliasSet(&IRDB, true, AAType); - auto Res = psr::VTAResolver(&IRDB, &VTP, &BaseCG, &AA); + auto Res = [&] { + if (BuildBaseCG) { + auto BaseCG = std::make_unique( + psr::buildLLVMBasedCallGraph(IRDB, BaseRes, EntryPoints)); + return psr::VTAResolver(&IRDB, &VTP, &AA, std::move(BaseCG)); + } + return psr::VTAResolver(&IRDB, &VTP, &AA, &BaseRes); + }(); return psr::buildLLVMBasedCallGraph(IRDB, Res, EntryPoints); } case psr::CallGraphAnalysisType::OTF: { diff --git a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp index ffee4f2b8e..e0183950b8 100644 --- a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -68,7 +68,7 @@ psr::LLVMBasedCallGraph createBaseCG(psr::LLVMProjectIRDB &IRDB, psr::LLVMBasedCallGraph computeVTACallGraph( psr::LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, psr::LLVMAliasInfoRef AS, const psr::LLVMBasedCallGraph &BaseCG) { - psr::VTAResolver Res(&IRDB, &VTP, &BaseCG, AS); + psr::VTAResolver Res(&IRDB, &VTP, AS, &BaseCG); return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB)); } From 8502dbcc0cf245cb93e5e19e059957a803b18027 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 5 Sep 2025 14:56:23 +0200 Subject: [PATCH 16/27] Fix SCCGenericTest --- include/phasar/Utils/BitSet.h | 31 +++++++++++++------- include/phasar/Utils/SCCGeneric.h | 45 ++++++++++++++++++------------ unittests/Utils/SCCGenericTest.cpp | 21 ++++++++++---- 3 files changed, 63 insertions(+), 34 deletions(-) diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h index a5ae207a84..c4cb90de28 100644 --- a/include/phasar/Utils/BitSet.h +++ b/include/phasar/Utils/BitSet.h @@ -18,7 +18,20 @@ #include namespace psr { -template class BitSet { + +namespace internal { +inline llvm::ArrayRef getWords(const llvm::BitVector &BV, + uintptr_t & /*Store*/) { + return BV.getData(); +} +inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, + uintptr_t &Store) { + return BV.getData(Store); +} +} // namespace internal + +template +class BitSet { public: class Iterator { public: @@ -28,7 +41,7 @@ template class BitSet { using difference_type = ptrdiff_t; using iterator_category = std::forward_iterator_tag; - Iterator(llvm::SmallBitVector::const_set_bits_iterator It) noexcept + Iterator(typename BitVectorTy::const_set_bits_iterator It) noexcept : It(It) {} Iterator &operator++() noexcept { @@ -50,7 +63,7 @@ template class BitSet { } private: - llvm::SmallBitVector::const_set_bits_iterator It; + typename BitVectorTy::const_set_bits_iterator It; }; using iterator = Iterator; @@ -126,8 +139,8 @@ template class BitSet { uintptr_t LhsStore{}; uintptr_t RhsStore{}; - auto LhsWords = Lhs.Bits.getData(LhsStore); - auto RhsWords = Rhs.Bits.getData(RhsStore); + auto LhsWords = internal::getWords(Lhs.Bits, LhsStore); + auto RhsWords = internal::getWords(Rhs.Bits, RhsStore); if (LhsWords.size() == RhsWords.size()) { return LhsWords == RhsWords; } @@ -177,8 +190,8 @@ template class BitSet { uintptr_t Buf = 0; uintptr_t OfBuf = 0; - auto Words = Bits.getData(Buf); - auto OfWords = Of.Bits.getData(OfBuf); + auto Words = internal::getWords(Bits, Buf); + auto OfWords = internal::getWords(Of.Bits, OfBuf); if (Words.size() > OfWords.size()) { if (llvm::any_of(Words.drop_front(OfWords.size()), [](uintptr_t W) { return W != 0; })) { @@ -205,10 +218,8 @@ template class BitSet { [[nodiscard]] size_t size() const noexcept { return Bits.count(); } [[nodiscard]] bool empty() const noexcept { return Bits.none(); } - [[nodiscard]] bool test(uint32_t Ident) { return Bits.test(Ident); } - private: - llvm::SmallBitVector Bits; + BitVectorTy Bits; }; } // namespace psr diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index d148711060..c848a131d5 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -18,11 +18,13 @@ #include "phasar/Utils/TypedVector.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include +#include namespace psr { class LLVMBasedICFG; @@ -293,20 +295,20 @@ computeSCCIterative(const G &Graph) { // Number of nodes (vertices are assumed to be consecutive indices). size_t NumNodes = GTraits::size(Graph); - // Use TypedVector for per-vertex data instead of unordered_map. - TypedVector Dfn; // discovery index. - Dfn.resize(NumNodes, UNVISITED); - TypedVector Lowlink; // smallest index reachable. - Lowlink.resize(NumNodes, 0); - TypedVector InStack; // marker for Tarjan's stack. - InStack.resize(NumNodes, false); + // discovery index. + TypedVector Dfn(NumNodes, UNVISITED); + + // smallest index reachable. + TypedVector Lowlink(NumNodes, 0); + + // marker for Tarjan's stack. + BitSet InStack(NumNodes, false); int CurrentIndex = 0; // Our final SCC holder. Pre-resize SCCOfNode to the number of nodes. SCCHolder Holder; Holder.SCCOfNode.resize(NumNodes); - // Initially, holder.NodesInSCC is empty and holder.NumSCCs is zero. // Instead of storing a vector of out-edges, we store an iterator pair. using OutEdgeRange = @@ -319,18 +321,25 @@ computeSCCIterative(const G &Graph) { OutEdgeIterator It; OutEdgeIterator ItEnd; }; - std::vector DfsStack; - std::vector S; // Tarjan's stack (vertices in the current DFS path). + llvm::SmallVector DfsStack; + // Tarjan's stack (vertices in the current DFS path). + llvm::SmallVector S; // Helper to push a new DFS frame. - const auto PushFrame = [&](const VertexTy &V) { + const auto PushFrame = [&](VertexTy V) { auto &&Range = GTraits::outEdges(Graph, V); - DFSFrame Frame{ + static_assert( + std::is_lvalue_reference_v || + std::is_trivially_destructible_v>, + "We assume that outEdges gives either a reference or a view into the " + "out-edges, but never an owning container by value. Otherwise, the " + "DFSFrame iterators may be dangling"); + + DfsStack.emplace_back(DFSFrame{ V, std::begin(Range), std::end(Range), - }; - DfsStack.push_back(Frame); + }); }; // Iterate over all vertices (assumed to be dense). @@ -344,7 +353,7 @@ computeSCCIterative(const G &Graph) { Lowlink[V] = CurrentIndex; CurrentIndex++; S.push_back(V); - InStack[V] = true; + InStack.insert(V); // DFS simulation using the explicit stack. while (!DfsStack.empty()) { @@ -361,8 +370,8 @@ computeSCCIterative(const G &Graph) { Lowlink[W] = CurrentIndex; CurrentIndex++; S.push_back(W); - InStack[W] = true; - } else if (InStack[W]) { + InStack.insert(W); + } else if (InStack.contains(W)) { // w is in the current DFS path; update lowlink. Lowlink[U] = std::min(Lowlink[U], Dfn[W]); } @@ -375,7 +384,7 @@ computeSCCIterative(const G &Graph) { do { W = S.back(); S.pop_back(); - InStack[W] = false; + InStack.erase(W); // Assign w the current SCC id. Holder.SCCOfNode[W] = static_cast(Holder.size()); Comp.push_back(W); diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index e43e209a1a..906f918363 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -12,6 +12,7 @@ #include "phasar/Utils/AdjacencyList.h" #include "phasar/Utils/EmptyBaseOptimizationUtils.h" #include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/TypedVector.h" #include "gtest/gtest.h" @@ -23,7 +24,6 @@ using namespace psr; enum class NodeId : uint32_t {}; -using SCCId = SCCId; using ExampleGraph = AdjacencyList; @@ -34,13 +34,22 @@ static void computeSCCsAndCompare(ExampleGraph &Graph) { << "Iterative Approach did not reach all nodes\n"; ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) << "Recursive Approach did not reach all nodes\n"; - EXPECT_EQ(OutputRec.size(), OutputIt.size()) + ASSERT_EQ(OutputRec.size(), OutputIt.size()) << "Unequal number of SCC components\n"; - /*std::cout << std::to_string(OutputRec.NumSCCs) << " " - << std::to_string(OutputIt.NumSCCs) << "\n";*/ + + const auto None = SCCId(UINT32_MAX); + TypedVector, SCCId> Isomorphism(OutputRec.size(), None); + for (auto Vtx : GraphTraits::vertices(Graph)) { - EXPECT_EQ(OutputRec.SCCOfNode[Vtx], OutputIt.SCCOfNode[Vtx]) - << "SCCs differ at Index: " << uint32_t(Vtx) << "\n"; + auto RecSCC = OutputRec.SCCOfNode[Vtx]; + auto ItSCC = OutputIt.SCCOfNode[Vtx]; + + if (Isomorphism[RecSCC] == None) { + Isomorphism[RecSCC] = ItSCC; + } else { + EXPECT_EQ(Isomorphism[RecSCC], ItSCC) + << "SCCs differ at Index: " << uint32_t(Vtx) << "\n"; + } } } From 2c4e14d4eddaa0b15a8fa929a4320d587cc2303c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 5 Sep 2025 17:16:23 +0200 Subject: [PATCH 17/27] Refine the concepts for GraphTraits + add some comments --- .../ControlFlow/Resolver/VTAResolver.h | 37 +++++ .../ControlFlow/VTA/TypeAssignmentGraph.h | 5 + .../ControlFlow/VTA/TypePropagator.h | 3 + include/phasar/Utils/AdjacencyList.h | 83 ++++++----- include/phasar/Utils/BitSet.h | 30 +++- include/phasar/Utils/GraphTraits.h | 135 ++++++++++-------- include/phasar/Utils/SCCGeneric.h | 89 +++++++----- include/phasar/Utils/TypeTraits.h | 9 ++ .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 22 +-- .../ControlFlow/VTACallGraphTest.cpp | 1 - unittests/Utils/SCCGenericTest.cpp | 18 +-- 11 files changed, 264 insertions(+), 168 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index a93d5f5ce0..d1d72849e3 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -22,6 +22,15 @@ #include "llvm/ADT/STLFunctionalExtras.h" namespace psr { +///\brief A Resolver that uses a variant of the Variable Type Analysis to +/// resolver indirect calls. +/// +/// Uses debug-information to achieve better results with C++ virtual calls. +/// Uses alias-information as fallback mechanism for when types don't help or +/// are not found, e.g., to resolve function-pointer calls. +/// +/// Requires a base-call-graph or at least a base-resolver to resolve indirect +/// calls while constructing the type-assignment graph. class VTAResolver : public Resolver { public: struct DefaultReachableFunctions { @@ -29,9 +38,21 @@ class VTAResolver : public Resolver { llvm::function_ref WithFun); }; + /// Constructs a VTAResolver with a given pre-computed call-graph and + /// call-back based alias-information (to-be-replaced by AliasIterator once + /// available) + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. explicit VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, MaybeUniquePtr BaseCG); + + /// Constructs a VTAResolver with a given pre-computed call-graph and + /// LLVMAliasInfoRef alias-information. + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. explicit VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, MaybeUniquePtr BaseCG) @@ -44,12 +65,28 @@ class VTAResolver : public Resolver { }, std::move(BaseCG)) {} + /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) + /// and call-back based alias-information (to-be-replaced by AliasIterator + /// once available). + /// Uses the optional parameter ReachableFunctions to consider only a subset + /// of all functions for building the type-assignment graph + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. explicit VTAResolver( const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, llvm::function_ref)> ReachableFunctions = DefaultReachableFunctions{}); + + /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) + /// and LLVMAliasInfoRef alias-information. + /// Uses the optional parameter ReachableFunctions to consider only a subset + /// of all functions for building the type-assignment graph + /// + /// Builds the type-assignment graph and propagates allocated types though + /// it's SCCs. explicit VTAResolver( const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index fa32b8f3c3..2e6dc2a229 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -181,6 +181,11 @@ template <> struct GraphTraits { assert(G.Adj.inbounds(Vtx)); return G.Adj[Vtx]; } + [[nodiscard]] static size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { + assert(G.Adj.inbounds(Vtx)); + return G.Adj[Vtx].size(); + } [[nodiscard]] static const auto &nodes(const graph_type &G) noexcept { return G.Nodes; diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h index 34d7085f02..2e6d6b874d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h @@ -32,6 +32,8 @@ namespace psr::vta { struct TypeAssignmentGraph; enum class TAGNodeId : uint32_t; +/// \brief A concrete type-assignment that assigns a set of possible types to +/// each SCC of the TypeAssignmentGraph struct TypeAssignment { TypedVector, llvm::SmallDenseSet &SCCs); }; +/// Computes a TypeAssignment, based on a given TypeAssignmentGraph [[nodiscard]] TypeAssignment propagateTypes(const TypeAssignmentGraph &TAG, const SCCHolder &SCCs, const SCCDependencyGraph &Deps, diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index fbae8d348d..890aaf837d 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include #include #include #include @@ -57,10 +58,10 @@ struct GraphTraits> { /// \returns The vertex-descriptor for the newly created node template >> - static vertex_t addNode(graph_type &G, TT &&Val) { + static constexpr vertex_t addNode(graph_type &G, TT &&Val) { assert(G.Adj.size() == G.Nodes.size()); - auto Ret = G.Nodes.size(); + auto Ret = vertex_t(G.Nodes.size()); G.Nodes.push_back(std::forward(Val)); G.Adj.emplace_back(); return Ret; @@ -71,15 +72,15 @@ struct GraphTraits> { /// \returns The vertex-descriptor for the newly created node template >> - static vertex_t addNode(graph_type &G, value_type /*Val*/ = {}) { - auto Ret = G.Adj.size(); + static constexpr vertex_t addNode(graph_type &G, value_type /*Val*/ = {}) { + auto Ret = vertex_t(G.Adj.size()); G.Adj.emplace_back(); return Ret; } /// Makes the node Vtx as root in the graph G. A node should not be registered /// as root multiple times - static void addRoot(graph_type &G, vertex_t Vtx) { + static constexpr void addRoot(graph_type &G, vertex_t Vtx) { assert(G.Adj.inbounds(Vtx)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); @@ -88,7 +89,8 @@ struct GraphTraits> { } /// Gets a range of all root nodes of graph G - static llvm::ArrayRef roots(const graph_type &G) noexcept { + static constexpr llvm::ArrayRef + roots(const graph_type &G) noexcept { if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } @@ -99,7 +101,7 @@ struct GraphTraits> { /// be nodes inside G. Multi-edges are supported, i.e. edges are not /// deduplicated automatically; to manually deduplicate the edges of one /// source-node, call dedupOutEdges() - static void addEdge(graph_type &G, vertex_t From, edge_t To) { + static constexpr void addEdge(graph_type &G, vertex_t From, edge_t To) { assert(G.Adj.inbounds(From)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); @@ -108,8 +110,8 @@ struct GraphTraits> { } /// Gets a range of all edges outgoing from node Vtx in graph G - static llvm::ArrayRef outEdges(const graph_type &G, - vertex_t Vtx) noexcept { + static constexpr llvm::ArrayRef outEdges(const graph_type &G, + vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); @@ -118,7 +120,8 @@ struct GraphTraits> { } /// Gets the number of edges outgoing from node Vtx in graph G - static size_t outDegree(const graph_type &G, vertex_t Vtx) noexcept { + static constexpr size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); @@ -127,41 +130,53 @@ struct GraphTraits> { } /// Deduplicates the edges outgoing from node Vtx in graph G. Deduplication is - /// based on operator< and operator== of the edge_t type - static void dedupOutEdges(graph_type &G, vertex_t Vtx) noexcept { + /// based on operator== of the edge_t type, and operator< if available. + static constexpr void dedupOutEdges(graph_type &G, vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } auto &OutEdges = G.Adj[Vtx]; - std::sort(OutEdges.begin(), OutEdges.end()); - OutEdges.erase(std::unique(OutEdges.begin(), OutEdges.end()), - OutEdges.end()); + + if constexpr (IsLessComparable) { + std::sort(OutEdges.begin(), OutEdges.end()); + OutEdges.erase(std::unique(OutEdges.begin(), OutEdges.end()), + OutEdges.end()); + } else { + auto End = OutEdges.end(); + for (auto It = OutEdges.begin(); It < End; ++It) { + End = std::remove(std::next(It), End, *It); + } + OutEdges.erase(End, OutEdges.end()); + } } /// Gets a const range of all nodes in graph G template >> - static llvm::ArrayRef nodes(const graph_type &G) noexcept { + static constexpr llvm::ArrayRef + nodes(const graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a mutable range of all nodes in graph G template >> - static llvm::MutableArrayRef nodes(graph_type &G) noexcept { + static constexpr llvm::MutableArrayRef + nodes(graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a range of all nodes in graph G template >> - static RepeatRangeType nodes(const graph_type &G) noexcept { + static constexpr RepeatRangeType + nodes(const graph_type &G) noexcept { return repeat(value_type{}, G.Adj.size()); } /// Gets a range of vertex-descriptors for all nodes in graph G - static auto vertices(const graph_type &G) noexcept { + static constexpr auto vertices(const graph_type &G) noexcept { if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } @@ -171,7 +186,8 @@ struct GraphTraits> { /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static const value_type &node(const graph_type &G, vertex_t Vtx) noexcept { + static constexpr const value_type &node(const graph_type &G, + vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; @@ -179,7 +195,7 @@ struct GraphTraits> { /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static value_type &node(graph_type &G, vertex_t Vtx) noexcept { + static constexpr value_type &node(graph_type &G, vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); assert(G.Adj.size() == G.Nodes.size()); return G.Nodes[Vtx]; @@ -188,14 +204,14 @@ struct GraphTraits> { /// Gets the node-tag for node Vtx in graph G. Vtx must be part of G template >> - static value_type node([[maybe_unused]] const graph_type &G, - [[maybe_unused]] vertex_t Vtx) noexcept { + static constexpr value_type node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { assert(G.Adj.inbounds(Vtx)); return {}; } /// Gets the number of nodes in graph G - static size_t size(const graph_type &G) noexcept { + static constexpr size_t size(const graph_type &G) noexcept { if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } @@ -203,7 +219,7 @@ struct GraphTraits> { } /// Gets the number of nodes in graph G that are marked as root - static size_t roots_size(const graph_type &G) noexcept { // NOLINT + static constexpr size_t roots_size(const graph_type &G) noexcept { // NOLINT if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } @@ -211,7 +227,7 @@ struct GraphTraits> { } /// Pre-allocates space to hold up to Capacity nodes - static void reserve(graph_type &G, size_t Capacity) { + static constexpr void reserve(graph_type &G, size_t Capacity) { if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); G.Nodes.reserve(Capacity); @@ -224,7 +240,7 @@ struct GraphTraits> { /// was another not-popped node inserted in between. /// /// \returns True, iff the removal was successful - static bool pop(graph_type &G, vertex_t Vtx) { + static constexpr bool pop(graph_type &G, vertex_t Vtx) { if (Vtx == G.Adj.size() - 1) { G.Adj.pop_back(); if constexpr (!std::is_empty_v) { @@ -237,7 +253,7 @@ struct GraphTraits> { /// Gets the vertex-descriptor of the target-node of the given Edge template - static std::enable_if_t, vertex_t> + static constexpr std::enable_if_t, vertex_t> target(edge_t Edge) noexcept { return Edge; } @@ -246,21 +262,21 @@ struct GraphTraits> { /// weight of the returned edge and the parameter edge is same, but the target /// nodes may differ. template - static std::enable_if_t, edge_t> + static constexpr std::enable_if_t, edge_t> withEdgeTarget(edge_t /*edge*/, vertex_t Tar) noexcept { return Tar; } /// Gets the weight associated with the given edge - static EmptyType weight(edge_t /*unused*/) noexcept { return {}; } + static constexpr EmptyType weight(edge_t /*unused*/) noexcept { return {}; } /// Removes the edge denoted by It outgoing from source-vertex Vtx from the /// graph G. This function is not required by the is_graph_trait concept. /// /// \returns An edge_iterator directly following It that should be used to /// continue iteration instead of std::next(It) - static edge_iterator removeEdge(graph_type &G, vertex_t Vtx, - edge_iterator It) noexcept { + static constexpr edge_iterator removeEdge(graph_type &G, vertex_t Vtx, + edge_iterator It) noexcept { assert(G.Adj.inbounds(Vtx)); if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); @@ -278,7 +294,8 @@ struct GraphTraits> { /// /// \returns A roots_iterator directly following It that should be used to /// continue iteration instead of std::next(It) - static roots_iterator removeRoot(graph_type &G, roots_iterator It) noexcept { + static constexpr roots_iterator removeRoot(graph_type &G, + roots_iterator It) noexcept { if constexpr (!std::is_empty_v) { assert(G.Adj.size() == G.Nodes.size()); } diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h index c4cb90de28..6d418994ca 100644 --- a/include/phasar/Utils/BitSet.h +++ b/include/phasar/Utils/BitSet.h @@ -30,9 +30,20 @@ inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, } } // namespace internal -template -class BitSet { +/// \brief A set-type that can compactly store sets of sequential integer-like +/// types. +/// +/// Use this type for sequential (unsigned) integers and ids that can convert +/// from and to uint32_t. +/// +/// \tparam IdT The type of elements to store in this set. Must be losslessly +/// convertible from and to uint32_t. +/// \tparam BitVectorTy The underlying bit-vector to use. Must be either +/// llvm::BitVector or llvm::SmallBitVector. +template class BitSet { public: + /// Wraps BitVectorTy::const_set_bits_iterator, as LLVM's bitset iterators + /// unfortunately do not conform to the named requirement of an iterator class Iterator { public: using value_type = IdT; @@ -94,6 +105,7 @@ class BitSet { Bits.set(Index); } + /// Same as insert(), but returns, whether the set was changed. [[nodiscard]] bool tryInsert(IdT Id) { auto Index = uint32_t(Id); if (Bits.size() <= Index) { @@ -110,6 +122,7 @@ class BitSet { Bits.reset(uint32_t(Id)); } } + /// Same as erase(), but returns, whether the set was changed. [[nodiscard]] bool tryErase(IdT Id) noexcept { if (contains(Id)) { return Bits.reset(uint32_t(Id)), true; @@ -120,9 +133,10 @@ class BitSet { void mergeWith(const BitSet &Other) { Bits |= Other.Bits; } + /// Same as mergeWith(), but returns, whether the set was changed. bool tryMergeWith(const BitSet &Other) { /// TODO: Make this more efficient - return *this == Other ? false : (mergeWith(Other), true); + return isSupersetOf(Other) ? false : (mergeWith(Other), true); } void clear() noexcept { Bits.reset(); } @@ -166,6 +180,7 @@ class BitSet { } [[nodiscard]] iterator end() const noexcept { return Bits.set_bits_end(); } + /// Same as mergeWith() void operator|=(const BitSet &Other) { Bits |= Other.Bits; } void operator-=(const BitSet &Other) { Bits.reset(Other.Bits); } @@ -177,10 +192,12 @@ class BitSet { return Ret; } + /// Same as mergeWith(), but returns *this to allow a fluent interface. BitSet &insertAllOf(const BitSet &Other) { Bits |= Other.Bits; return *this; } + /// Same as operator-=, but returns *this to allow a fluent interface. BitSet &eraseAllOf(const BitSet &Other) { Bits.reset(Other.Bits); return *this; @@ -212,10 +229,13 @@ class BitSet { return Of.isSubsetOf(*this); } - // The number of bits available + /// The number of bits available. This operation is O(1) [[nodiscard]] size_t capacity() const noexcept { return Bits.size(); } - // The number of bits set to 1 + /// The number of bits set to 1. In contrast to most other containers, this + /// operation is linear in O(capacity()) [[nodiscard]] size_t size() const noexcept { return Bits.count(); } + /// Whether this set contains no elements. In contrast to most other + /// containers, this operation is linear in O(capacity()) [[nodiscard]] bool empty() const noexcept { return Bits.none(); } private: diff --git a/include/phasar/Utils/GraphTraits.h b/include/phasar/Utils/GraphTraits.h index bdfd82f212..d4d795e063 100644 --- a/include/phasar/Utils/GraphTraits.h +++ b/include/phasar/Utils/GraphTraits.h @@ -10,9 +10,9 @@ #ifndef PHASAR_UTILS_GRAPHTRAITS_H #define PHASAR_UTILS_GRAPHTRAITS_H +#include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/Utilities.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -35,18 +35,17 @@ template struct GraphTraits; #if __cplusplus >= 202002L template -concept is_graph_edge = requires(const Edge e1, Edge e2) { - { e1 == e2 } -> std::convertible_to; - { e1 != e2 } -> std::convertible_to; - { e1 < e2 } -> std::convertible_to; +concept is_graph_edge = requires(const Edge E1, Edge E2) { + { E1 == E2 } -> std::convertible_to; + { E1 != E2 } -> std::convertible_to; }; template concept is_const_graph_trait = - requires(const typename GraphTrait::graph_type &cgraph, - typename GraphTrait::value_type val, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_t edge) { + requires(const typename GraphTrait::graph_type &CGraph, + typename GraphTrait::value_type Val, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_t Edge) { typename GraphTrait::graph_type; typename GraphTrait::value_type; typename GraphTrait::vertex_t; @@ -58,91 +57,101 @@ concept is_const_graph_trait = } -> std::convertible_to; { - GraphTrait::outEdges(cgraph, vtx) + GraphTrait::outEdges(CGraph, Vtx) } -> psr::is_iterable_over_v; - { GraphTrait::outDegree(cgraph, vtx) } -> std::convertible_to; + { GraphTrait::outDegree(CGraph, Vtx) } -> std::convertible_to; { - GraphTrait::nodes(cgraph) + GraphTrait::nodes(CGraph) } -> psr::is_iterable_over_v; { - GraphTrait::roots(cgraph) + GraphTrait::roots(CGraph) } -> psr::is_iterable_over_v; { - GraphTrait::vertices(cgraph) + GraphTrait::vertices(CGraph) } -> psr::is_iterable_over_v; { - GraphTrait::node(cgraph, vtx) + GraphTrait::node(CGraph, Vtx) } -> std::convertible_to; - { GraphTrait::size(cgraph) } -> std::convertible_to; - { GraphTrait::roots_size(cgraph) } -> std::convertible_to; + { GraphTrait::size(CGraph) } -> std::convertible_to; + { GraphTrait::roots_size(CGraph) } -> std::convertible_to; { - GraphTrait::target(edge) + GraphTrait::target(Edge) } -> std::convertible_to; { - GraphTrait::withEdgeTarget(edge, vtx) + GraphTrait::withEdgeTarget(Edge, Vtx) } -> std::convertible_to; }; template concept is_graph_trait = is_const_graph_trait && - requires(typename GraphTrait::graph_type &graph, - typename GraphTrait::value_type val, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_t edge) { - typename GraphTrait::graph_type; - typename GraphTrait::value_type; - typename GraphTrait::vertex_t; - typename GraphTrait::edge_t; - requires is_graph_edge; + requires(typename GraphTrait::graph_type &Graph, + typename GraphTrait::value_type Val, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_t Edge) { { - GraphTrait::Invalid - } -> std::convertible_to; - { - GraphTrait::addNode(graph, val) - } -> std::convertible_to; - { GraphTrait::addEdge(graph, vtx, edge) }; - { GraphTrait::dedupOutEdges(graph, vtx) }; - { GraphTrait::addRoot(graph, vtx) }; - { GraphTrait::pop(graph, vtx) } -> std::same_as; - { - GraphTrait::target(edge) + GraphTrait::addNode(Graph, Val) } -> std::convertible_to; + { GraphTrait::addEdge(Graph, Vtx, Edge) }; + { GraphTrait::dedupOutEdges(Graph, Vtx) }; + { GraphTrait::addRoot(Graph, Vtx) }; + { GraphTrait::pop(Graph, Vtx) } -> std::same_as; + }; + +template +concept is_weighted_const_graph_trait = + is_const_graph_trait && + requires(const typename GraphTrait::edge_t &Edge) { + typename GraphTrait::weight_t; { - GraphTrait::withEdgeTarget(edge, vtx) - } -> std::convertible_to; - { GraphTrait::weight(edge) }; + GraphTrait::weight(Edge) + } -> std::convertible_to; }; +template +concept is_weighted_graph_trait = + is_graph_trait && is_weighted_const_graph_trait; template -concept is_const_graph = requires(Graph g) { +concept is_const_graph = requires(Graph G) { typename GraphTraits>; requires is_const_graph_trait>>; }; template -concept is_graph = requires(Graph g) { +concept is_graph = requires(Graph G) { typename GraphTraits>; requires is_graph_trait>>; }; +template +concept is_weighted_const_graph = requires(Graph G) { + typename GraphTraits>; + requires is_weighted_const_graph_trait>>; +}; + +template +concept is_weighted_graph = requires(Graph G) { + typename GraphTraits>; + requires is_weighted_graph_trait>>; +}; + template concept is_reservable_graph_trait_v = - is_graph_trait && requires(typename GraphTrait::graph_type &g) { - { GraphTrait::reserve(g, size_t(0)) }; + is_graph_trait && requires(typename GraphTrait::graph_type &G) { + { GraphTrait::reserve(G, size_t(0)) }; }; template concept is_removable_graph_trait_v = is_graph_trait && - requires(typename GraphTrait::graph_type &g, - typename GraphTrait::vertex_t vtx, - typename GraphTrait::edge_iterator edge_it, - typename GraphTrait::roots_iterator root_it) { + requires(typename GraphTrait::graph_type &G, + typename GraphTrait::vertex_t Vtx, + typename GraphTrait::edge_iterator EdgeIt, + typename GraphTrait::roots_iterator RootIt) { typename GraphTrait::edge_iterator; typename GraphTrait::roots_iterator; - { GraphTrait::removeEdge(g, vtx, edge_it) }; - { GraphTrait::removeRoot(g, root_it) }; + { GraphTrait::removeEdge(G, Vtx, EdgeIt) }; + { GraphTrait::removeRoot(G, RootIt) }; }; #else @@ -227,7 +236,7 @@ template void printGraph(const GraphTy &G, llvm::raw_ostream &OS, llvm::StringRef Name = "", NodeTransform NodeToString = {}) #if __cplusplus >= 202002L - requires is_graph + requires is_const_graph #endif { using traits_t = GraphTraits; @@ -237,17 +246,23 @@ void printGraph(const GraphTy &G, llvm::raw_ostream &OS, auto Sz = traits_t::size(G); - for (size_t I = 0; I < Sz; ++I) { - OS << I; - if constexpr (!std::is_same_v) { + for (auto Vtx : traits_t::vertices(G)) { + OS << size_t(Vtx); + if constexpr (!std::is_empty_v) { OS << "[label=\""; - OS.write_escaped(std::invoke(NodeToString, traits_t::node(G, I))); + OS.write_escaped(std::invoke(NodeToString, traits_t::node(G, Vtx))); OS << "\"]"; } OS << ";\n"; - for (const auto &Edge : traits_t::outEdges(G, I)) { - OS << I << "->" << Edge << ";\n"; + for (const auto &Edge : traits_t::outEdges(G, Vtx)) { + OS << size_t(Vtx) << "->"; + if constexpr (is_llvm_printable_v) { + // to print the edge-weight as well, if possible + OS << Edge; + } else { + OS << size_t(traits_t::target(Edge)); + } + OS << ";\n"; } } } diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index c848a131d5..cffb050946 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -18,7 +18,6 @@ #include "phasar/Utils/TypedVector.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" @@ -26,13 +25,10 @@ #include #include -namespace psr { -class LLVMBasedICFG; -} // namespace psr - namespace psr { namespace detail { +// Unfortunately, `enum class` cannot be templated... struct SCCIdBase { uint32_t Value{}; @@ -58,6 +54,10 @@ struct SCCIdBase { }; } // namespace detail +/// \brief The Id of a strongly-connected component in a graph. +/// +/// \tparam GraphNodeId The vertex-type of the graph where this SCC was computed +/// for. template struct SCCId : detail::SCCIdBase { using detail::SCCIdBase::SCCIdBase; }; @@ -84,24 +84,27 @@ template struct DenseMapInfo> { namespace psr { -// holds the scc's of a given graph +/// \brief Holds the SCCs of a given graph. Each SCC is assigned a unique +/// sequential id. template struct SCCHolder { TypedVector, 0> SCCOfNode; TypedVector, llvm::SmallVector> NodesInSCC{}; + /// Number of SCCs [[nodiscard]] size_t size() const noexcept { return NodesInSCC.size(); } [[nodiscard]] bool empty() const noexcept { return NodesInSCC.empty(); } }; -// holds a graph were the scc's are collapsed to a single node. Resulting graph -// is a DAG +/// \brief Holds a graph where the SCCs are collapsed to a single node. +/// Conforms to the is_const_graph concept. template struct SCCDependencyGraph { TypedVector, llvm::SmallDenseSet>> ChildrenOfSCC; llvm::SmallVector, 0> SCCRoots; }; +/// \brief Implements the is_const_graph concept for SCCDependencyGraph template struct GraphTraits> { using graph_type = SCCDependencyGraph; @@ -111,56 +114,58 @@ struct GraphTraits> { static inline constexpr auto Invalid = vertex_t(UINT32_MAX); - [[nodiscard]] static const auto &outEdges(const graph_type &G, - vertex_t Vtx) noexcept { + [[nodiscard]] static constexpr const auto &outEdges(const graph_type &G, + vertex_t Vtx) noexcept { assert(G.ChildrenOfSCC.inbounds(Vtx)); return G.ChildrenOfSCC[Vtx]; } - [[nodiscard]] static size_t outDegree(const graph_type &G, - vertex_t Vtx) noexcept { + [[nodiscard]] static constexpr size_t outDegree(const graph_type &G, + vertex_t Vtx) noexcept { assert(G.ChildrenOfSCC.inbounds(Vtx)); return G.ChildrenOfSCC[Vtx].size(); } - [[nodiscard]] static RepeatRangeType - nodes(const graph_type &G) noexcept { + [[nodiscard]] static constexpr auto nodes(const graph_type &G) noexcept { return repeat(EmptyType{}, G.ChildrenOfSCC.size()); } - [[nodiscard]] static llvm::ArrayRef + [[nodiscard]] static constexpr llvm::ArrayRef roots(const graph_type &G) noexcept { return G.SCCRoots; } - [[nodiscard]] static auto vertices(const graph_type &G) noexcept { - return iota(G.Adj.size()); + [[nodiscard]] static constexpr auto vertices(const graph_type &G) noexcept { + return iota(G.ChildrenOfSCC.size()); } - [[nodiscard]] static value_type node([[maybe_unused]] const graph_type &G, - [[maybe_unused]] vertex_t Vtx) noexcept { + [[nodiscard]] static constexpr value_type + node([[maybe_unused]] const graph_type &G, + [[maybe_unused]] vertex_t Vtx) noexcept { assert(G.ChildrenOfSCC.inbounds(Vtx)); return {}; } - [[nodiscard]] static size_t size(const graph_type &G) noexcept { + [[nodiscard]] static constexpr size_t size(const graph_type &G) noexcept { return G.ChildrenOfSCC.size(); } - [[nodiscard]] static size_t + [[nodiscard]] static constexpr size_t roots_size(const graph_type &G) noexcept { // NOLINT return G.SCCRoots.size(); } - [[nodiscard]] constexpr vertex_t target(edge_t Edge) noexcept { return Edge; } + [[nodiscard]] static constexpr vertex_t target(edge_t Edge) noexcept { + return Edge; + } - [[nodiscard]] vertex_t withEdgeTarget(edge_t /*edge*/, - vertex_t Tar) noexcept { + [[nodiscard]] static constexpr vertex_t + withEdgeTarget(edge_t /*edge*/, vertex_t Tar) noexcept { return Tar; } }; -// holds topologically sorted SCCDependencyGraph +/// \brief Holds topologically sorted SCCDependencyGraph nodes template struct SCCOrder { llvm::SmallVector, 0> SCCIds; }; @@ -180,21 +185,13 @@ template struct SCCData { Seen(NumFuns) {} }; -template struct SCCDataIt { - TypedVector Disc; - TypedVector Low; - BitSet OnStack; - llvm::SmallVector Stack; +template struct SCCDataIt : SCCData { llvm::SmallVector> CallStack; - uint32_t Time = 0; - BitSet Seen; - explicit SCCDataIt(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} + using SCCData::SCCData; }; -constexpr void setMin(uint32_t &InOut, uint32_t Other) { +constexpr void setMin(uint32_t &InOut, uint32_t Other) noexcept { if (Other < InOut) { InOut = Other; } @@ -258,6 +255,10 @@ computeSCCsRec(const G &Graph, typename GraphTraits::vertex_t CurrNode, } // namespace detail +/// \brief Computes the strongly-connected components (SCCs) of a given graph. +/// The graph should conform to the is_const_graph concept. +/// +/// Uses Tarjan's algorithm (recursive) to compute the SCCs. template [[nodiscard]] SCCHolder::vertex_t> computeSCCs(const G &Graph) { @@ -282,7 +283,11 @@ computeSCCs(const G &Graph) { return Ret; } -// Note: generated by FhGenie GPT o3 Mini +/// \brief Computes the strongly-connected components (SCCs) of a given graph. +/// The graph should conform to the is_const_graph concept. +/// +/// Uses a non-recursive variant of Tarjan's algorithm to compute the SCCs. +/// \attention Largely generated by FhGenie GPT o3 Mini, so use with caution! template SCCHolder>::vertex_t> computeSCCIterative(const G &Graph) { @@ -302,7 +307,7 @@ computeSCCIterative(const G &Graph) { TypedVector Lowlink(NumNodes, 0); // marker for Tarjan's stack. - BitSet InStack(NumNodes, false); + BitSet InStack(NumNodes, false); int CurrentIndex = 0; @@ -403,6 +408,9 @@ computeSCCIterative(const G &Graph) { return Holder; } +/// \brief Creates a graph based on the given input Graph, collapsing all SCCs +/// to single nodes. The resulting graph is always a DAG, i.e., it contains no +/// cycles template SCCDependencyGraph::vertex_t> computeSCCDependencies( const G &Graph, const SCCHolder::vertex_t> &SCCs) { @@ -436,6 +444,11 @@ SCCDependencyGraph::vertex_t> computeSCCDependencies( return Ret; } +/// \brief Computes a topological order of the nodes in the given +/// dependency-graph. +/// +/// Uses a simple, recursive postorder-DFS search to find a topological +/// ordering. template [[nodiscard]] SCCOrder computeSCCOrder(const SCCHolder &SCCs, diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index d823fc52a2..b430161b97 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -165,6 +165,12 @@ struct AreEqualityComparable() == std::declval())> : std::true_type {}; +template +struct IsLessComparable : std::false_type {}; +template +struct IsLessComparable() < std::declval())> + : std::true_type {}; + template struct HasDepth : std::false_type {}; template struct HasDepth().depth())> @@ -285,6 +291,9 @@ PSR_CONCEPT IsEqualityComparable = detail::IsEqualityComparable::value; template PSR_CONCEPT AreEqualityComparable = detail::AreEqualityComparable::value; +template +PSR_CONCEPT IsLessComparable = detail::IsLessComparable::value; + template PSR_CONCEPT has_isInteresting_v = // NOLINT detail::has_isInteresting::value; diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index 7edd675c04..933297ef65 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -12,35 +12,17 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/Utilities.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/raw_ostream.h" #include #include @@ -50,6 +32,10 @@ using namespace psr; using namespace psr::vta; +#if __cplusplus >= 202002L +static_assert(is_const_graph); +#endif + static void printNodeImpl(llvm::raw_ostream &OS, Variable Var) { OS << "var-"; OS.write_escaped(psr::llvmIRToString(Var.Val)); diff --git a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp index e0183950b8..c88e1a643c 100644 --- a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -7,7 +7,6 @@ * Fabian Schiebel and others *****************************************************************************/ -// #include "phasar/AnalysisConfig.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 906f918363..e731fdde1d 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -51,6 +51,11 @@ static void computeSCCsAndCompare(ExampleGraph &Graph) { << "SCCs differ at Index: " << uint32_t(Vtx) << "\n"; } } + +#if __cplusplus >= 202002L + auto SCCDeps = computeSCCDependencies(Graph, OutputRec); + static_assert(is_const_graph); +#endif } TEST(SCCGenericTest, SCCTest) { @@ -113,19 +118,6 @@ TEST(SCCGenericTest, SCCTest) { for (auto &TestGraph : TestGraphs) { computeSCCsAndCompare(TestGraph); } - - /*auto OutputRec = analysis::call_graph::execTarjan(Graph, false); - auto OutputIt = analysis::call_graph::execTarjan(Graph, true); - ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) - << "Iterative Approach did not reach all nodes\n"; - ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) - << "Recursive Approach did not reach all nodes\n"; - EXPECT_EQ(OutputRec.NumSCCs, OutputIt.NumSCCs) - << "Unequal number of SCC components\n"; - for (size_t ID = 0; ID < Graph.Adj.size(); ID++) { - EXPECT_EQ(OutputRec.SCCOfNode[ID], OutputIt.SCCOfNode[ID]) - << "SCCs differ at Index: " << std::to_string(ID) << "\n"; - }*/ } // main function for the test case From 47d4bbd3282170f9dace8f3617a6a4fed3f5028e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 5 Sep 2025 17:45:48 +0200 Subject: [PATCH 18/27] Some cleanup --- include/phasar/AnalysisConfig.h | 29 -- include/phasar/PhasarLLVM/ControlFlow.h | 1 + .../PhasarLLVM/ControlFlow/LLVMBasedCFG.h | 2 + .../ControlFlow/LLVMVFTableProvider.h | 2 - .../PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h | 344 ------------------ include/phasar/Utils.h | 2 + include/phasar/Utils/AdjacencyList.h | 2 +- include/phasar/Utils/BitSet.h | 28 +- include/phasar/Utils/IO.h | 2 +- include/phasar/Utils/TypeTraits.h | 7 +- lib/PhasarLLVM/ControlFlow/ControlFlow.cppm | 2 + .../ControlFlow/GlobalCtorsDtorsModel.cpp | 1 + .../ControlFlow/Resolver/Resolver.cpp | 2 - lib/Utils/IO.cpp | 1 - lib/Utils/Utils.cppm | 16 +- .../PhasarLLVM/ControlFlow/CMakeLists.txt | 26 +- unittests/Utils/SCCGenericTest.cpp | 4 +- 17 files changed, 56 insertions(+), 415 deletions(-) delete mode 100644 include/phasar/AnalysisConfig.h delete mode 100644 include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h diff --git a/include/phasar/AnalysisConfig.h b/include/phasar/AnalysisConfig.h deleted file mode 100644 index 30aa18fab9..0000000000 --- a/include/phasar/AnalysisConfig.h +++ /dev/null @@ -1,29 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and others - *****************************************************************************/ - -#ifndef PHASAR_ANALYSISCONFIG_H -#define PHASAR_ANALYSISCONFIG_H - -#include "nlohmann/json.hpp" - -#include -#include - -namespace psr { -struct AnalysisConfig { - std::string OutputFile; - bool TreatWarningsAsError = false; - - std::optional PrecomputedCG; - std::optional PrecomputedAA; - /// TODO: More config options -}; -} // namespace psr - -#endif diff --git a/include/phasar/PhasarLLVM/ControlFlow.h b/include/phasar/PhasarLLVM/ControlFlow.h index 5ab99e536f..7019a511d7 100644 --- a/include/phasar/PhasarLLVM/ControlFlow.h +++ b/include/phasar/PhasarLLVM/ControlFlow.h @@ -20,5 +20,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #endif // PHASAR_PHASARLLVM_CONTROLFLOW_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index 6a3f97c56e..6bc968f4fc 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -18,6 +18,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "nlohmann/json.hpp" + namespace llvm { class Function; } // namespace llvm diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 3d050a1ba1..2352b049bc 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -20,8 +20,6 @@ #include #include -#include - namespace llvm { class Module; class DIType; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h deleted file mode 100644 index 5a29b01d8f..0000000000 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/SCCGeneric.h +++ /dev/null @@ -1,344 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and other - *****************************************************************************/ - -// header guards hinzufügen - -#include "phasar/PhasarLLVM/ControlFlow/TypeAssignmentGraph.h" - -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/HashBuilder.h" -#include "llvm/Support/raw_ostream.h" - -#include - -namespace psr { -class LLVMBasedICFG; -} // namespace psr - -namespace psr::analysis::call_graph { -// struct TypeAssignmentGraph; -// enum class GraphNodeId : uint32_t; - -enum class [[clang::enum_extensibility(open)]] SCCId : uint32_t{}; - -// holds the scc's of a given graph -template struct SCCHolder { - llvm::SmallVector SCCOfNode{}; - llvm::SmallVector> NodesInSCC{}; - size_t NumSCCs = 0; -}; - -// holds a graph were the scc's are compressed to a single node. Resulting graph -// is a DAG -template struct SCCCallers { - llvm::SmallVector, 0> ChildrenOfSCC{}; - llvm::SmallVector SCCRoots{}; - - void print(llvm::raw_ostream &OS, - const SCCHolder &SCCs, const G &Graph); -}; - -// holds topologically sorted scccallers -struct SCCOrder { - llvm::SmallVector SCCIds; -}; - -template struct SCCData { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; - llvm::SmallVector Stack; - uint32_t Time = 0; - llvm::SmallBitVector Seen; - - explicit SCCData(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} -}; - -template struct SCCDataIt { - llvm::SmallVector Disc; - llvm::SmallVector Low; - llvm::SmallBitVector OnStack; - llvm::SmallVector Stack; - llvm::SmallVector> CallStack; - uint32_t Time = 0; - llvm::SmallBitVector Seen; - - explicit SCCDataIt(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} -}; - -static void setMin(uint32_t &InOut, uint32_t Other) { - if (Other < InOut) { - InOut = Other; - } -} - -// TODO: Non-recursive version -template -static void computeSCCsRec(const G &Graph, typename G::GraphNodeId CurrNode, - SCCData &Data, - SCCHolder &Holder) { - // See - // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components - - auto CurrTime = Data.Time++; - Data.Disc[size_t(CurrNode)] = CurrTime; - Data.Low[size_t(CurrNode)] = CurrTime; - Data.Stack.push_back(CurrNode); - Data.OnStack.set(uint32_t(CurrNode)); - - for (auto SuccNode : Graph.Adj[size_t(CurrNode)]) { - if (Data.Disc[size_t(SuccNode)] == UINT32_MAX) { - // Tree-edge: Not seen yet --> recurse - - computeSCCsRec(Graph, SuccNode, Data, Holder); - setMin(Data.Low[size_t(CurrNode)], Data.Low[size_t(SuccNode)]); - } else if (Data.OnStack.test(uint32_t(SuccNode))) { - // Back-edge --> circle! - - setMin(Data.Low[size_t(CurrNode)], Data.Disc[size_t(SuccNode)]); - } - } - - if (Data.Low[size_t(CurrNode)] == Data.Disc[size_t(CurrNode)]) { - // Found SCC - - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != CurrNode) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } -} - -// Iterative IMplementation for Tarjan's SCC Alg. -// -> Heapoverflow through simulated Stack? -template -static void tarjanIt(const G &Graph, SCCDataIt &Data, - SCCHolder &Holder) { - - auto CurrTime = Data.Time; - for (uint32_t Vertex = 0; Vertex < Graph.Adj.size(); Vertex++) { - if (Data.Disc[size_t(Vertex)] == UINT32_MAX) { - Data.CallStack.push_back({G::GraphNodeId(Vertex), 0}); - while (!Data.CallStack.empty()) { - auto Curr = Data.CallStack.pop_back_val(); - // Curr.second = 0 implies that Curr.fist was not visited before - if (Curr.second == 0) { - Data.Disc[size_t(Curr.first)] = CurrTime; - Data.Low[size_t(Curr.first)] = CurrTime; - CurrTime++; - Data.Stack.push_back(Curr.first); - Data.OnStack.set(uint32_t(Curr.first)); - } - // Curr.second > 0 implies that we came back from a recursive call - if (Curr.second > 0) { - setMin(Data.Low[size_t(Curr.first)], - Data.Low[size_t(Curr.second) - 1]); - } - // find the next recursive function call - while (Curr.second < Graph.getEdges(Curr.first).size() && - Data.Disc[size_t(Graph.getEdges(Curr.first)[Curr.second])]) { - typename G::GraphNodeId W = Graph.getEdges(Curr.first)[Curr.second]; - if (Data.OnStack.test(uint32_t(W))) { - setMin(Data.Low[size_t(Curr.first)], Data.Disc[size_t(W)]); - } - Curr.second++; - // If a Node u is undiscovered i.e. Data.Disc[size_t(u)] = UINT32_MAX - // start a recursive function call - if (Curr.second < Graph.getEdges(Curr.first).size()) { - typename G::GraphNodeId U = Graph.getEdges(Curr.first)[Curr.second]; - Data.CallStack.push_back({Curr.first, Curr.second++}); - Data.CallStack.push_back({U, 0}); - } - // If Curr.first is the root of a connected component i.e. Data.Disc = - // Data.Low - if (Data.Low[size_t(Curr.first)] == Data.Disc[size_t(Curr.first)]) { - //-> SCC found - auto SCCIdx = SCCId(Holder.NumSCCs++); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != Curr.first) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[size_t(Fun)] = SCCIdx; - Data.OnStack.reset(uint32_t(Fun)); - Data.Seen.set(uint32_t(Fun)); - NodesInSCC.push_back(Fun); - } - } - } - } - } -} - -template -[[nodiscard]] SCCHolder computeSCCs(const G &Graph) { - SCCHolder Ret{}; - - auto NumNodes = Graph.Adj.size(); - Ret.SCCOfNode.resize(NumNodes); - - if (!NumNodes) { - return Ret; - } - - SCCData Data(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) { - computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); - } - } - - return Ret; -} - -// choose which Tarjan implementation will be executed -template -[[nodiscard]] SCCHolder -execTarjan(const G &Graph, const bool Iterative) { - SCCHolder Ret{}; - - auto NumNodes = Graph.Adj.size(); - Ret.SCCOfNode.resize(NumNodes); - - if (!NumNodes) { - return Ret; - } - - SCCData Data(NumNodes); - SCCDataIt DataIt(NumNodes); - for (uint32_t FunId = 0; FunId != NumNodes; ++FunId) { - if (!Data.Seen.test(FunId)) { - if (Iterative) { - tarjanIt(Graph, DataIt, Ret); - } else { - computeSCCsRec(Graph, G::GraphNodeId(FunId), Data, Ret); - } - } - } - - return Ret; -} - -template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCCallers -computeSCCCallers(const G &Graph, - const SCCHolder &SCCs); - -template -auto computeSCCCallers(const G &Graph, - const SCCHolder &SCCs) - -> SCCCallers { - SCCCallers Ret; - Ret.ChildrenOfSCC.resize(SCCs.NumSCCs); - - llvm::SmallBitVector Roots(SCCs.NumSCCs, true); - - size_t NodeId = 0; - for (const auto &SuccNodes : Graph.Adj) { - auto SrcSCC = SCCs.SCCOfNode[NodeId]; - - for (auto SuccNode : SuccNodes) { - auto DestSCC = SCCs.SCCOfNode[size_t(SuccNode)]; - if (DestSCC != SrcSCC) { - Ret.ChildrenOfSCC[size_t(SrcSCC)].insert(DestSCC); - Roots.reset(uint32_t(DestSCC)); - } - } - - ++NodeId; - } - - Ret.SCCRoots.reserve(Roots.count()); - for (auto Rt : Roots.set_bits()) { - Ret.SCCRoots.push_back(SCCId(Rt)); - } - - return Ret; -} - -template -[[nodiscard]] LLVM_LIBRARY_VISIBILITY SCCOrder -computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers); -template -inline auto computeSCCOrder(const SCCHolder &SCCs, - const SCCCallers &Callers) - -> SCCOrder { - SCCOrder Ret; - Ret.SCCIds.reserve(SCCs.NumSCCs); - - llvm::SmallBitVector Seen; - Seen.resize(SCCs.NumSCCs); - - auto Dfs = [&](auto &Dfs, SCCId CurrSCC) -> void { - Seen.set(uint32_t(CurrSCC)); - for (auto Caller : Callers.ChildrenOfSCC[size_t(CurrSCC)]) { - if (!Seen.test(uint32_t(Caller))) { - Dfs(Dfs, Caller); - } - } - Ret.SCCIds.push_back(CurrSCC); - }; - - for (auto Leaf : Callers.SCCRoots) { - if (!Seen.test(uint32_t(Leaf))) { - Dfs(Dfs, Leaf); - } - } - - std::reverse(Ret.SCCIds.begin(), Ret.SCCIds.end()); - - return Ret; -} -} // namespace psr::analysis::call_graph - -namespace llvm { -template <> struct DenseMapInfo { - using SCCId = psr::analysis::call_graph::SCCId; - - static inline SCCId getEmptyKey() noexcept { return SCCId(-1); } - static inline SCCId getTombstoneKey() noexcept { return SCCId(-2); } - static inline auto getHashValue(SCCId Id) noexcept { - return llvm::hash_value(uint32_t(Id)); - } - static inline bool isEqual(SCCId L, SCCId R) noexcept { return L == R; } -}; -} // namespace llvm diff --git a/include/phasar/Utils.h b/include/phasar/Utils.h index f120b4faa0..c754608a0a 100644 --- a/include/phasar/Utils.h +++ b/include/phasar/Utils.h @@ -11,6 +11,7 @@ #define PHASAR_UTILS_H #include "phasar/Utils/AnalysisProperties.h" +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/BoxedPointer.h" #include "phasar/Utils/ByRef.h" @@ -28,6 +29,7 @@ #include "phasar/Utils/Nullable.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Printer.h" +#include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/StableVector.h" #include "phasar/Utils/Table.h" diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index 890aaf837d..d9e44560e6 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -40,7 +40,7 @@ struct AdjacencyList { }; /// A simple graph implementation based on an adjacency list -template +template struct GraphTraits> { using graph_type = AdjacencyList; using value_type = T; diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h index 6d418994ca..03023af337 100644 --- a/include/phasar/Utils/BitSet.h +++ b/include/phasar/Utils/BitSet.h @@ -19,17 +19,6 @@ namespace psr { -namespace internal { -inline llvm::ArrayRef getWords(const llvm::BitVector &BV, - uintptr_t & /*Store*/) { - return BV.getData(); -} -inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, - uintptr_t &Store) { - return BV.getData(Store); -} -} // namespace internal - /// \brief A set-type that can compactly store sets of sequential integer-like /// types. /// @@ -41,6 +30,15 @@ inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, /// \tparam BitVectorTy The underlying bit-vector to use. Must be either /// llvm::BitVector or llvm::SmallBitVector. template class BitSet { + static llvm::ArrayRef getWords(const llvm::BitVector &BV, + uintptr_t & /*Store*/) { + return BV.getData(); + } + static llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, + uintptr_t &Store) { + return BV.getData(Store); + } + public: /// Wraps BitVectorTy::const_set_bits_iterator, as LLVM's bitset iterators /// unfortunately do not conform to the named requirement of an iterator @@ -153,8 +151,8 @@ template class BitSet { uintptr_t LhsStore{}; uintptr_t RhsStore{}; - auto LhsWords = internal::getWords(Lhs.Bits, LhsStore); - auto RhsWords = internal::getWords(Rhs.Bits, RhsStore); + auto LhsWords = getWords(Lhs.Bits, LhsStore); + auto RhsWords = getWords(Rhs.Bits, RhsStore); if (LhsWords.size() == RhsWords.size()) { return LhsWords == RhsWords; } @@ -207,8 +205,8 @@ template class BitSet { uintptr_t Buf = 0; uintptr_t OfBuf = 0; - auto Words = internal::getWords(Bits, Buf); - auto OfWords = internal::getWords(Of.Bits, OfBuf); + auto Words = getWords(Bits, Buf); + auto OfWords = getWords(Of.Bits, OfBuf); if (Words.size() > OfWords.size()) { if (llvm::any_of(Words.drop_front(OfWords.size()), [](uintptr_t W) { return W != 0; })) { diff --git a/include/phasar/Utils/IO.h b/include/phasar/Utils/IO.h index 27669460b3..081332b9a1 100644 --- a/include/phasar/Utils/IO.h +++ b/include/phasar/Utils/IO.h @@ -22,7 +22,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "nlohmann/json.hpp" +#include "nlohmann/json_fwd.hpp" #include diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index b430161b97..e1bdad5bb4 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -15,8 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" -#include "nlohmann/json.hpp" - #include #include #include @@ -315,10 +313,11 @@ constexpr size_t variant_idx = detail::variant_idx::value; template using ElementType = typename detail::ElementType::type; -template +template struct has_getAsJson : std::false_type {}; // NOLINT template -struct has_getAsJson().getAsJson())> +struct has_getAsJson().getAsJson())>> : std::true_type {}; // NOLINT struct TrueFn { diff --git a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm index 271967a5b2..0b5be19763 100644 --- a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm +++ b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm @@ -8,6 +8,7 @@ module; #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" #include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" @@ -45,4 +46,5 @@ using psr::SparseLLVMBasedCFGProvider; using psr::SparseLLVMBasedICFG; using psr::SparseLLVMBasedICFGView; using psr::valueOf; +using psr::VTAResolver; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp index cffe27cb89..91aa57504b 100644 --- a/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp +++ b/lib/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/IRBuilder.h" #include +#include namespace psr { template diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index df4464458f..5716ddd4be 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -17,8 +17,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/ControlFlow/CallGraphAnalysisType.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" diff --git a/lib/Utils/IO.cpp b/lib/Utils/IO.cpp index dad2e4a70f..b016db47d4 100644 --- a/lib/Utils/IO.cpp +++ b/lib/Utils/IO.cpp @@ -18,7 +18,6 @@ #include "phasar/Utils/ErrorHandling.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/lib/Utils/Utils.cppm b/lib/Utils/Utils.cppm index 9440d6c5dd..7f42cf169f 100644 --- a/lib/Utils/Utils.cppm +++ b/lib/Utils/Utils.cppm @@ -32,6 +32,7 @@ module; #include "phasar/Utils/PointerUtils.h" #include "phasar/Utils/Printer.h" #include "phasar/Utils/RepeatIterator.h" +#include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/SemiRing.h" #include "phasar/Utils/Soundness.h" #include "phasar/Utils/StableVector.h" @@ -93,11 +94,14 @@ using psr::hasFlag; using psr::InitPhasar; using psr::iota; using psr::IotaIterator; +using psr::is_const_graph; using psr::is_graph; using psr::is_graph_edge; using psr::is_graph_trait; using psr::is_removable_graph_trait_v; using psr::is_reservable_graph_trait_v; +using psr::is_weighted_const_graph; +using psr::is_weighted_graph; using psr::JoinLattice; using psr::JoinLatticeTraits; using psr::Logger; @@ -143,6 +147,10 @@ using psr::AreEqualityComparable; using psr::assertAllNotNull; using psr::assertNotNull; using psr::computePowerSet; +using psr::computeSCCDependencies; +using psr::computeSCCIterative; +using psr::computeSCCOrder; +using psr::computeSCCs; using psr::createTimeStamp; using psr::DefaultConstruct; using psr::DenseSet; @@ -167,6 +175,8 @@ using psr::IdentityFn; using psr::IgnoreArgs; using psr::intersectWith; using psr::is_crtp_base_of_v; +using psr::is_explicitly_convertible_to; +using psr::is_incrementable; using psr::is_iterable_over_v; using psr::is_iterable_v; using psr::is_llvm_hashable_v; @@ -181,10 +191,15 @@ using psr::is_variant; using psr::is_variant_v; using psr::isConstructor; using psr::IsEqualityComparable; +using psr::IsLessComparable; using psr::isMangled; using psr::Overloaded; using psr::remove_by_index; using psr::reserveIfPossible; +using psr::SCCDependencyGraph; +using psr::SCCHolder; +using psr::SCCId; +using psr::SCCOrder; using psr::scope_exit; using psr::SmallDenseTable1d; using psr::StableVector; @@ -195,5 +210,4 @@ using psr::TrueFn; using psr::UnorderedSet; using psr::UnorderedTable1d; using psr::variant_idx; -// using psr::variant_idx; } // namespace psr diff --git a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt index 20c6350dbb..680b7aa647 100644 --- a/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt +++ b/unittests/PhasarLLVM/ControlFlow/CMakeLists.txt @@ -1,20 +1,20 @@ set(ControlFlowSources - LLVMBasedCFGTest.cpp - LLVMBasedICFGTest.cpp - LLVMBasedICFG_CHATest.cpp - LLVMBasedICFG_OTFTest.cpp - LLVMBasedICFG_RTATest.cpp - LLVMBasedICFG_RTA_MultipleInheritanceTest.cpp - LLVMBasedBackwardCFGTest.cpp - LLVMBasedBackwardICFGTest.cpp - LLVMBasedICFGExportTest.cpp - LLVMBasedICFGGlobCtorDtorTest.cpp - LLVMBasedICFGSerializationTest.cpp - LLVMVFTableProviderTest.cpp + LLVMBasedCFGTest.cpp + LLVMBasedICFGTest.cpp + LLVMBasedICFG_CHATest.cpp + LLVMBasedICFG_OTFTest.cpp + LLVMBasedICFG_RTATest.cpp + LLVMBasedICFG_RTA_MultipleInheritanceTest.cpp + LLVMBasedBackwardCFGTest.cpp + LLVMBasedBackwardICFGTest.cpp + LLVMBasedICFGExportTest.cpp + LLVMBasedICFGGlobCtorDtorTest.cpp + LLVMBasedICFGSerializationTest.cpp + LLVMVFTableProviderTest.cpp VTACallGraphTest.cpp ) set(LLVM_LINK_COMPONENTS Linker) # The CtorDtorTest needs the linker foreach(TEST_SRC ${ControlFlowSources}) - add_phasar_unittest(${TEST_SRC}) + add_phasar_unittest(${TEST_SRC}) endforeach(TEST_SRC) diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index e731fdde1d..b87e11b910 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -19,7 +19,7 @@ #include //===----------------------------------------------------------------------===// -// Unit tests for the Igeneric SCC algorithm +// Unit tests for the generic SCC algorithm using namespace psr; @@ -53,7 +53,7 @@ static void computeSCCsAndCompare(ExampleGraph &Graph) { } #if __cplusplus >= 202002L - auto SCCDeps = computeSCCDependencies(Graph, OutputRec); + [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, OutputRec); static_assert(is_const_graph); #endif } From a344ef2f4f1b68577902e79ab11f88f0e5be2a55 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 14 Sep 2025 15:38:57 +0200 Subject: [PATCH 19/27] Add ground-truth to SCCGenericTest + fix error in Compressor introduced by merge + mino --- include/phasar/Utils/AdjacencyList.h | 5 - include/phasar/Utils/Compressor.h | 2 +- include/phasar/Utils/GraphTraits.h | 12 +- include/phasar/Utils/SCCGeneric.h | 65 ++++++++- unittests/Utils/SCCGenericTest.cpp | 206 +++++++++++++++++---------- 5 files changed, 200 insertions(+), 90 deletions(-) diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index d9e44560e6..5f9f3616a9 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -306,11 +306,6 @@ struct GraphTraits> { G.Roots.pop_back(); return It; } - -#if __cplusplus >= 202002L - static_assert(is_graph); -#endif - static_assert(is_reservable_graph_trait_v); }; } // namespace psr diff --git a/include/phasar/Utils/Compressor.h b/include/phasar/Utils/Compressor.h index 74512e045e..8a99eef817 100644 --- a/include/phasar/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -49,7 +49,7 @@ class Compressor>> { } std::pair insert(T Elem) { - auto [It, Inserted] = ToInt.try_emplace(Elem, Id(ToInt.size())); + auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(ToInt.size())); if (Inserted) { FromInt.push_back(Elem); } diff --git a/include/phasar/Utils/GraphTraits.h b/include/phasar/Utils/GraphTraits.h index d4d795e063..04e6f1d501 100644 --- a/include/phasar/Utils/GraphTraits.h +++ b/include/phasar/Utils/GraphTraits.h @@ -232,6 +232,13 @@ struct DefaultNodeTransform { } }; +/// \brief Prints the given graph G as dot. +/// +/// \param G The graph to print +/// \param OS The output-stream, where to print into +/// \param Name The name of the graph +/// \param NodeToString If the graph has node-labels, convert a node-label to +/// string template void printGraph(const GraphTy &G, llvm::raw_ostream &OS, llvm::StringRef Name = "", NodeTransform NodeToString = {}) @@ -241,11 +248,10 @@ void printGraph(const GraphTy &G, llvm::raw_ostream &OS, { using traits_t = GraphTraits; - OS << "digraph " << Name << " {\n"; + OS << "digraph \""; + OS.write_escaped(Name) << "\" {\n"; psr::scope_exit CloseBrace = [&OS] { OS << "}\n"; }; - auto Sz = traits_t::size(G); - for (auto Vtx : traits_t::vertices(G)) { OS << size_t(Vtx); if constexpr (!std::is_empty_v) { diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index cffb050946..6c4a70fbec 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -28,7 +28,8 @@ namespace psr { namespace detail { -// Unfortunately, `enum class` cannot be templated... +// Unfortunately, `enum class` cannot be templated, but we want type-safety for +// SCC-IDs... struct SCCIdBase { uint32_t Value{}; @@ -94,6 +95,60 @@ template struct SCCHolder { /// Number of SCCs [[nodiscard]] size_t size() const noexcept { return NodesInSCC.size(); } [[nodiscard]] bool empty() const noexcept { return NodesInSCC.empty(); } + + /// \brief Prints the given Graph as dot, highlighting the SCCs in the graph. + /// + /// \param Graph The graph to print + /// \param OS The output-stream, where to print into + /// \param Name The name of the graph + /// \param NodeToString If the graph has node-labels, convert a node-label to + /// string + template ::vertex_t, GraphNodeId>, + int> = 0> +#if __cplusplus >= 202002L + requires is_const_graph +#endif + void print(const G &Graph, llvm::raw_ostream &OS, llvm::StringRef Name = "", + NodeTransform NodeToString = {}) const { + OS << "digraph \""; + OS.write_escaped(Name) << "\" {\n"; + psr::scope_exit CloseBrace = [&] { OS << "}\n"; }; + + using GTraits = psr::GraphTraits; + + for (const auto &[SCCId, SCC] : NodesInSCC.enumerate()) { + OS << " subgraph cluster_" << +SCCId + << "{\n node [style=filled]; color=blue; label=\"SCC " << +SCCId + << "\";\n"; + psr::scope_exit CloseSCC = [&] { OS << " }\n"; }; + + for (auto Nod : SCC) { + OS << " " << size_t(Nod); + if constexpr (!std::is_empty_v) { + OS << "[label=\""; + OS.write_escaped( + std::invoke(NodeToString, GTraits::node(Graph, Nod))); + OS << "\"]"; + } + OS << ";\n"; + } + } + + for (auto FromVtx : GTraits::vertices(Graph)) { + for (const auto &Succ : GTraits::outEdges(Graph, FromVtx)) { + OS << " " << size_t(FromVtx) << "->"; + if constexpr (is_llvm_printable_v) { + // to print the edge-weight as well, if possible + OS << Succ; + } else { + OS << size_t(GTraits::target(Succ)); + } + OS << ";\n"; + } + } + } }; /// \brief Holds a graph where the SCCs are collapsed to a single node. @@ -191,7 +246,7 @@ template struct SCCDataIt : SCCData { using SCCData::SCCData; }; -constexpr void setMin(uint32_t &InOut, uint32_t Other) noexcept { +template constexpr void setMin(T &InOut, T Other) noexcept { if (Other < InOut) { InOut = Other; } @@ -378,7 +433,7 @@ computeSCCIterative(const G &Graph) { InStack.insert(W); } else if (InStack.contains(W)) { // w is in the current DFS path; update lowlink. - Lowlink[U] = std::min(Lowlink[U], Dfn[W]); + detail::setMin(Lowlink[U], Dfn[W]); } } else { // Done exploring u. @@ -391,7 +446,7 @@ computeSCCIterative(const G &Graph) { S.pop_back(); InStack.erase(W); // Assign w the current SCC id. - Holder.SCCOfNode[W] = static_cast(Holder.size()); + Holder.SCCOfNode[W] = SCCId(Holder.size()); Comp.push_back(W); } while (W != U); } @@ -399,7 +454,7 @@ computeSCCIterative(const G &Graph) { if (!DfsStack.empty()) { // After returning, update the parent's lowlink. VertexTy Parent = DfsStack.back().V; - Lowlink[Parent] = std::min(Lowlink[Parent], Lowlink[U]); + detail::setMin(Lowlink[Parent], Lowlink[U]); } } } diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index b87e11b910..adb22604db 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -12,8 +12,11 @@ #include "phasar/Utils/AdjacencyList.h" #include "phasar/Utils/EmptyBaseOptimizationUtils.h" #include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/TypedVector.h" +#include "llvm/ADT/ArrayRef.h" + #include "gtest/gtest.h" #include @@ -27,97 +30,148 @@ enum class NodeId : uint32_t {}; using ExampleGraph = AdjacencyList; -static void computeSCCsAndCompare(ExampleGraph &Graph) { - auto OutputRec = computeSCCs(Graph); - auto OutputIt = computeSCCIterative(Graph); - ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) - << "Iterative Approach did not reach all nodes\n"; - ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) - << "Recursive Approach did not reach all nodes\n"; - ASSERT_EQ(OutputRec.size(), OutputIt.size()) +static SCCHolder makeGTSCCs(llvm::ArrayRef> SCCs) { + SCCHolder Ret; + + uint32_t Ctr = 0; + for (const auto &SCC : SCCs) { + auto CurrSCC = SCCId(Ctr++); + auto &NodesInSCC = Ret.NodesInSCC.emplace_back(); + for (auto Nod : SCC) { + NodesInSCC.push_back(NodeId(Nod)); + + if (Ret.SCCOfNode.size() <= size_t(Nod)) { + Ret.SCCOfNode.resize(Nod + 1); + } + + Ret.SCCOfNode[NodeId(Nod)] = CurrSCC; + } + } + + return Ret; +}; + +static void compareSCCs(const SCCHolder &ComputedSCCs, + const SCCHolder &ExpectedSCCs, + std::string_view ComputedName) { + ASSERT_EQ(ComputedSCCs.size(), ExpectedSCCs.size()) << "Unequal number of SCC components\n"; + ASSERT_EQ(ComputedSCCs.SCCOfNode.size(), ExpectedSCCs.SCCOfNode.size()) + << "Unequal number of Graph Nodes\n"; const auto None = SCCId(UINT32_MAX); - TypedVector, SCCId> Isomorphism(OutputRec.size(), None); + TypedVector, SCCId> Isomorphism(ComputedSCCs.size(), + None); - for (auto Vtx : GraphTraits::vertices(Graph)) { - auto RecSCC = OutputRec.SCCOfNode[Vtx]; - auto ItSCC = OutputIt.SCCOfNode[Vtx]; + for (auto Vtx : iota(ComputedSCCs.SCCOfNode.size())) { + auto ExpectedSCC = ExpectedSCCs.SCCOfNode[Vtx]; + auto ComputedSCC = ComputedSCCs.SCCOfNode[Vtx]; - if (Isomorphism[RecSCC] == None) { - Isomorphism[RecSCC] = ItSCC; + if (Isomorphism[ExpectedSCC] == None) { + Isomorphism[ExpectedSCC] = ComputedSCC; } else { - EXPECT_EQ(Isomorphism[RecSCC], ItSCC) - << "SCCs differ at Index: " << uint32_t(Vtx) << "\n"; + EXPECT_EQ(Isomorphism[ExpectedSCC], ComputedSCC) + << "SCCs differ for node: " << uint32_t(Vtx) << " in " + << ComputedName; } } +} + +static void computeSCCsAndCompare(ExampleGraph &Graph, + llvm::ArrayRef> ExpectedSCCs) { + + auto OutputRec = computeSCCs(Graph); + auto OutputIt = computeSCCIterative(Graph); + ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) + << "Iterative Approach did not reach all nodes\n"; + ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) + << "Recursive Approach did not reach all nodes\n"; #if __cplusplus >= 202002L [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, OutputRec); static_assert(is_const_graph); #endif + + auto GroundTruth = makeGTSCCs(ExpectedSCCs); + compareSCCs(OutputRec, GroundTruth, "RecursiveTarjan"); + compareSCCs(OutputIt, GroundTruth, "IterativeTarjan"); + + // printGraph(Graph, llvm::outs(), "ExampleGraph"); + OutputRec.print(Graph, llvm::outs(), "ExampleGraph"); } -TEST(SCCGenericTest, SCCTest) { - ExampleGraph GraphOne{{{NodeId(2)}, - {NodeId(0)}, - {NodeId(1)}, - {NodeId(1), NodeId(2)}, - {NodeId(1)}, - {NodeId(4), NodeId(6)}, - {NodeId(4), NodeId(7)}, - {NodeId(5)}}}; - - ExampleGraph GraphTwo{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}; - - ExampleGraph GraphThree{{{NodeId(1)}, - {NodeId(2)}, - {NodeId(3)}, - {NodeId(4)}, - {NodeId(5)}, - {NodeId(6)}, - {NodeId(0)}}}; - - ExampleGraph GraphFour{{{NodeId(1), NodeId(2), NodeId(3), NodeId(4)}, - {NodeId(0), NodeId(2), NodeId(3), NodeId(4)}, - {NodeId(0), NodeId(1), NodeId(3), NodeId(4)}, - {NodeId(0), NodeId(1), NodeId(2), NodeId(4)}, - {NodeId(0), NodeId(1), NodeId(2), NodeId(3)}}}; - - ExampleGraph GraphFive{{{NodeId(1)}, - {NodeId(2)}, - {NodeId(3), NodeId(4)}, - {NodeId(5)}, - {NodeId(5)}, - {NodeId(2), NodeId(6)}, - {NodeId(7)}, - {NodeId(1), NodeId(8)}, - {}}}; - - ExampleGraph GraphSix{{{NodeId(1)}, - {NodeId(2)}, - {NodeId(3)}, - {NodeId(4)}, - {NodeId(5)}, - {NodeId(6)}, - {NodeId(7)}, - {NodeId(0)}, - {NodeId(9)}, - {NodeId(10)}, - {NodeId(11)}, - {NodeId(12)}, - {NodeId(13), NodeId(4)}, - {NodeId(8)}, - {NodeId(9)}, - {NodeId(3)}, - {NodeId(5)}}}; - - std::vector TestGraphs = {GraphOne, GraphTwo, GraphThree, - GraphFour, GraphFive, GraphSix}; - - for (auto &TestGraph : TestGraphs) { - computeSCCsAndCompare(TestGraph); - } +TEST(SCCGenericTest, SCCTest01) { + ExampleGraph Graph{{{NodeId(2)}, + {NodeId(0)}, + {NodeId(1)}, + {NodeId(1), NodeId(2)}, + {NodeId(1)}, + {NodeId(4), NodeId(6)}, + {NodeId(4), NodeId(7)}, + {NodeId(5)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3}, {4}, {5, 6, 7}}); +} + +TEST(SCCGenericTest, SCCTest02) { + ExampleGraph Graph{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}; + computeSCCsAndCompare(Graph, + {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}}); +} + +TEST(SCCGenericTest, SCCTest03) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(0)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6}}); +} + +TEST(SCCGenericTest, SCCTest04) { + ExampleGraph Graph{{{NodeId(1), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(2), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(3), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(4)}, + {NodeId(0), NodeId(1), NodeId(2), NodeId(3)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}}); +} + +TEST(SCCGenericTest, SCCTest05) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3), NodeId(4)}, + {NodeId(5)}, + {NodeId(5)}, + {NodeId(2), NodeId(6)}, + {NodeId(7)}, + {NodeId(1), NodeId(8)}, + {}}}; + computeSCCsAndCompare(Graph, {{0}, {1, 2, 3, 4, 5, 6, 7}, {8}}); +} + +TEST(SCCGenericTest, SCCTest06) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(0)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13), NodeId(4)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(3)}, + {NodeId(5)}}}; + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3, 4, 5, 6, 7}, {8, 9, 10, 11, 12, 13}, {14}, {15}, {16}}); } // main function for the test case From 8ae6a145c4c87a9909bf159068e4374802493f6c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 14 Sep 2025 16:34:42 +0200 Subject: [PATCH 20/27] Some cleanup + some comments --- .../ControlFlow/LLVMBasedCallGraphBuilder.h | 49 ++++++++++++++-- .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 8 ++- .../ControlFlow/Resolver/Resolver.h | 28 +++++---- .../ControlFlow/Resolver/VTAResolver.h | 27 ++------- .../ControlFlow/VTA/TypeAssignmentGraph.h | 2 +- include/phasar/Utils/SCCGeneric.h | 2 +- .../ControlFlow/Resolver/Resolver.cpp | 34 +++++------ .../ControlFlow/Resolver/VTAResolver.cpp | 58 +++++++++++-------- .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 13 ++--- unittests/Utils/SCCGenericTest.cpp | 2 +- 10 files changed, 131 insertions(+), 92 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h index 1679e7b5cd..1503b1d318 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h @@ -21,29 +21,66 @@ class DIBasedTypeHierarchy; class LLVMVFTableProvider; class Resolver; +/// Constructs a call-graph using the given CGResolver to resolve indirect +/// calls. +/// +/// Uses a fixpoint iteration, if +/// `CGResolver.mutatesHelperAnalysisInformation()` returns true and the +/// soundness S is not Soundness::Unsound. +/// +/// \param IRDB The IR code where the call-graph should be based on +/// \param CGResolver The resolver to use for resolving indirect calls. +/// \param EntryPoints The functions, where the call-graph construction should +/// start. The resulting call-graph will only contain functions that are +/// (transitively) reachable from the entry-points. +/// \param S The soundness level. May be used to trade soundness for +/// performance. [[nodiscard]] LLVMBasedCallGraph -buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, +buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, llvm::ArrayRef EntryPoints, - DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, - LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); +/// Constructs a call-graph using the given CGResolver to resolve indirect +/// calls. +/// +/// Uses a fixpoint iteration, if +/// `CGResolver.mutatesHelperAnalysisInformation()` returns true and the +/// soundness S is not Soundness::Unsound. +/// +/// \param IRDB The IR code where the call-graph should be based on +/// \param CGResolver The resolver to use for resolving indirect calls. +/// \param EntryPoints Names of the functions, where the call-graph construction +/// should start. The resulting call-graph will only contain functions that are +/// (transitively) reachable from the entry-points. +/// \param S The soundness level. May be used to trade soundness for +/// performance. [[nodiscard]] LLVMBasedCallGraph buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, - llvm::ArrayRef EntryPoints, + llvm::ArrayRef EntryPoints, Soundness S = Soundness::Soundy); +/// Kept for compatibility with LLVMBasedICFG. See the constructor of +/// LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *, CallGraphAnalysisType, +/// llvm::ArrayRef, DIBasedTypeHierarchy *, LLVMAliasInfoRef, +/// Soundness, bool) for more information. [[nodiscard]] LLVMBasedCallGraph buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, - llvm::ArrayRef EntryPoints, + llvm::ArrayRef EntryPoints, DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); +/// Kept for compatibility with LLVMBasedICFG. See the constructor of +/// LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *, CallGraphAnalysisType, +/// llvm::ArrayRef, DIBasedTypeHierarchy *, LLVMAliasInfoRef, +/// Soundness, bool) for more information. [[nodiscard]] LLVMBasedCallGraph -buildLLVMBasedCallGraph(const LLVMProjectIRDB &IRDB, Resolver &CGResolver, +buildLLVMBasedCallGraph(LLVMProjectIRDB &IRDB, CallGraphAnalysisType CGType, llvm::ArrayRef EntryPoints, + DIBasedTypeHierarchy &TH, LLVMVFTableProvider &VTP, + LLVMAliasInfoRef PT = nullptr, Soundness S = Soundness::Soundy); + } // namespace psr #endif // PHASAR_PHASARLLVM_CONTROLFLOW_LLVMBASEDCALLGRAPHBUILDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index b14ca90f75..34984e0358 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -61,10 +61,12 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { /// \param EntryPoints The names of the functions to start with when /// incrementally building up the ICFG. For whole-program analysis of an /// executable use {"main"}. - /// \param TH The type-hierarchy implementation to use. Will be constructed - /// on-the-fly if nullptr, but required + /// \param TH The type-hierarchy implementation to use. Must be non-null, if + /// the selected call-graph analysis requires type-hierarchy information; + /// currently, this holds for the CHA and RTA algorithms. /// \param PT The points-to implementation to use. Will be constructed - /// on-the-fly if nullptr, but required + /// on-the-fly if nullptr, but required; currently, this holds for the OTF and + /// VTA algorithms. /// \param S The soundness level to expect from the analysis. Currently unused /// \param IncludeGlobals Properly include global constructors/destructors /// into the ICFG, if true. Requires to generate artificial functions into the diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 81b499af5f..b3b4366d86 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -124,21 +124,29 @@ class Resolver { [[nodiscard]] llvm::ArrayRef getAddressTakenFunctions(); - struct DefaultBaseResolverProvider { - MaybeUniquePtr operator()(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT); - }; + using BaseResolverProvider = llvm::function_ref( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)>; + /// Factory function to create a Resolver that can be used to implement the + /// given call-graph analysis type. + /// + /// \param Ty Determines the Resolver subclass to instantiate + /// \param IRDB The IR code where the Resolver should be based on. Must not be + /// nullptr. + /// \param VTP A virtual-table-provides that is used to extract C++-VTables + /// from the IR. Must not be nullptr. + /// \param TH The type-hierarchy implementation to use. Must be non-null, if + /// the selected call-graph analysis requires type-hierarchy information; + /// currently, this holds for the CHA and RTA algorithms. + /// \param PT The points-to implementation to use. Will be constructed + /// on-the-fly if nullptr, but required; currently, this holds for the OTF and + /// VTA algorithms. static std::unique_ptr create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT = nullptr, - llvm::function_ref( - const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)> - GetBaseRes = DefaultBaseResolverProvider{}); + BaseResolverProvider GetBaseRes = nullptr); protected: virtual void resolveVirtualCall(FunctionSetTy &PossibleTargets, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index d1d72849e3..85bdb5f021 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -18,7 +18,6 @@ #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/SCCGeneric.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" namespace psr { @@ -40,7 +39,7 @@ class VTAResolver : public Resolver { /// Constructs a VTAResolver with a given pre-computed call-graph and /// call-back based alias-information (to-be-replaced by AliasIterator once - /// available) + /// available #783) /// /// Builds the type-assignment graph and propagates allocated types though /// it's SCCs. @@ -55,19 +54,11 @@ class VTAResolver : public Resolver { /// it's SCCs. explicit VTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, - MaybeUniquePtr BaseCG) - : VTAResolver( - IRDB, VTP, - [AS](const llvm::Value *Ptr, const llvm::Instruction *At, - vta::AliasHandlerTy WithAlias) { - auto ASet = AS.getAliasSet(Ptr, At); - llvm::for_each(*ASet, WithAlias); - }, - std::move(BaseCG)) {} + MaybeUniquePtr BaseCG); /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) /// and call-back based alias-information (to-be-replaced by AliasIterator - /// once available). + /// once available #783). /// Uses the optional parameter ReachableFunctions to consider only a subset /// of all functions for building the type-assignment graph /// @@ -92,15 +83,7 @@ class VTAResolver : public Resolver { LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, llvm::function_ref)> - ReachableFunctions = DefaultReachableFunctions{}) - : VTAResolver( - IRDB, VTP, - [AS](const llvm::Value *Ptr, const llvm::Instruction *At, - vta::AliasHandlerTy WithAlias) { - auto ASet = AS.getAliasSet(Ptr, At); - llvm::for_each(*ASet, WithAlias); - }, - std::move(BaseRes), ReachableFunctions) {} + ReachableFunctions = DefaultReachableFunctions{}); [[nodiscard]] std::string str() const override; @@ -116,7 +99,7 @@ class VTAResolver : public Resolver { void resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) override; - MaybeUniquePtr BaseCG{}; + MaybeUniquePtr BaseResolver{}; vta::TypeAssignment TA{}; SCCHolder SCCs{}; Compressor Nodes; diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index 2e6dc2a229..9a90188f53 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -159,7 +159,7 @@ using ReachableFunsHandlerTy = llvm::function_ref; using ReachableFunsTy = llvm::function_ref; -// TODO: Use AliasIterator here, once available +// TODO: Use AliasIterator here, once available #783 [[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( const LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, AliasInfoTy AS, Resolver &BaseRes, ReachableFunsTy ReachableFunctions); diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 6c4a70fbec..1443adfbf4 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -345,7 +345,7 @@ computeSCCs(const G &Graph) { /// \attention Largely generated by FhGenie GPT o3 Mini, so use with caution! template SCCHolder>::vertex_t> -computeSCCIterative(const G &Graph) { +computeSCCsIterative(const G &Graph) { using GTraits = GraphTraits>; using VertexTy = typename GTraits::vertex_t; using EdgeTy = typename GTraits::edge_t; diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index ca093d6390..ad44f3899d 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -252,9 +252,11 @@ auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) FunctionSetTy PossibleTargets; if (VTP && isVirtualCall(CallSite, *VTP)) { resolveVirtualCall(PossibleTargets, CallSite); - } - - if (PossibleTargets.empty()) { + } else { + // Note: Don't use resolveFunctionPointer() as fallback when + // resolveVirtualCall() does not find callees, because this will break the + // fixpoint computation when using the OTFResolver. Resolvers should install + // a meaningful fallback themselves, if necessary. resolveFunctionPointer(PossibleTargets, CallSite); } @@ -293,20 +295,10 @@ void Resolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, void Resolver::otherInst(const llvm::Instruction *Inst) {} -MaybeUniquePtr Resolver::DefaultBaseResolverProvider::operator()( - const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef /*PT*/) { - return std::make_unique(IRDB, VTP, TH); -} - -std::unique_ptr Resolver::create( - CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT, - llvm::function_ref( - const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT)> - GetBaseRes) { +std::unique_ptr +Resolver::create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT, BaseResolverProvider GetBaseRes) { assert(IRDB != nullptr); assert(VTP != nullptr); @@ -321,7 +313,13 @@ std::unique_ptr Resolver::create( return std::make_unique(IRDB, VTP, TH); case CallGraphAnalysisType::VTA: { assert(PT); - auto BaseRes = GetBaseRes(IRDB, VTP, TH, PT); + auto BaseRes = [&]() -> MaybeUniquePtr { + if (!GetBaseRes) { + return std::make_unique(IRDB, VTP, TH); + } + + return GetBaseRes(IRDB, VTP, TH, PT); + }(); assert(BaseRes != nullptr); return std::make_unique(IRDB, VTP, PT, std::move(BaseRes)); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp index 3f90a68e2b..9a54b764ef 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -48,11 +48,11 @@ VTAResolver::VTAResolver( llvm::function_ref)> ReachableFunctions) - : Resolver(IRDB, VTP), BaseCG(std::move(BaseRes)) { - assert(this->BaseCG != nullptr); + : Resolver(IRDB, VTP), BaseResolver(std::move(BaseRes)) { + assert(this->BaseResolver != nullptr); - auto TAG = vta::computeTypeAssignmentGraph(*IRDB, *VTP, AS, *this->BaseCG, - ReachableFunctions); + auto TAG = vta::computeTypeAssignmentGraph( + *IRDB, *VTP, AS, *this->BaseResolver, ReachableFunctions); SCCs = computeSCCs(TAG); auto Deps = computeSCCDependencies(TAG, SCCs); @@ -65,15 +65,38 @@ VTAResolver::VTAResolver( Nodes = std::move(TAG.Nodes); } +VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, + MaybeUniquePtr BaseCG) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseCG)) {} + +VTAResolver::VTAResolver( + const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, + LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, + llvm::function_ref)> + ReachableFunctions) + : VTAResolver( + IRDB, VTP, + [AS](const llvm::Value *Ptr, const llvm::Instruction *At, + vta::AliasHandlerTy WithAlias) { + auto ASet = AS.getAliasSet(Ptr, At); + llvm::for_each(*ASet, WithAlias); + }, + std::move(BaseRes), ReachableFunctions) {} + std::string VTAResolver::str() const { return "VTA"; } void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) { - // llvm::errs() << "[resolveVirtualCall] At " << llvmIRToString(CallSite) - // << '\n'; - - // TODO: Use getVFTIndexAndVT(), once #785 is merged auto RetrievedVtableIndex = getVFTIndex(CallSite); if (!RetrievedVtableIndex.has_value()) { // An error occured @@ -84,10 +107,10 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, return; } - auto *VT = CallSite->getCalledOperand()->stripPointerCastsAndAliases(); + auto *CalledOp = CallSite->getCalledOperand()->stripPointerCastsAndAliases(); auto VtableIndex = RetrievedVtableIndex.value(); - auto BaseCallees = BaseCG->resolveIndirectCall(CallSite); + auto BaseCallees = BaseResolver->resolveIndirectCall(CallSite); auto ReceiverIdx = CallSite->hasStructRetAttr(); if (CallSite->arg_size() > ReceiverIdx) { @@ -103,10 +126,6 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, DITy, VtableIndex, CallSite, ReceiverType)) { if (psr::isConsistentCall(CallSite, Fun) && (BaseCallees.empty() || BaseCallees.contains(Fun))) { - // llvm::errs() << " Add possible target " << Fun->getName() - // << " through vtable lookup at index " << - // VtableIndex - // << " on type " << llvmTypeToString(DITy) << '\n'; PossibleTargets.insert(Fun); } } @@ -115,7 +134,7 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, } } - auto TNId = Nodes.getOrNull({vta::Variable{VT}}); + auto TNId = Nodes.getOrNull({vta::Variable{CalledOp}}); if (TNId) { auto SCC = SCCs.SCCOfNode[*TNId]; const auto &Types = TA.TypesPerSCC[SCC]; @@ -123,8 +142,6 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, if (const auto *Fun = Ty.dyn_cast()) { if (psr::isConsistentCall(CallSite, Fun) && (BaseCallees.empty() || BaseCallees.contains(Fun))) { - // llvm::errs() << " Add possible target " << Fun->getName() - // << " through direct function pointer\n"; PossibleTargets.insert(Fun); } } @@ -138,10 +155,7 @@ void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, void VTAResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, const llvm::CallBase *CallSite) { - // llvm::errs() << "[resolveFunctionPointer] At " << llvmIRToString(CallSite) - // << '\n'; - - auto BaseCallees = BaseCG->resolveIndirectCall(CallSite); + auto BaseCallees = BaseResolver->resolveIndirectCall(CallSite); auto TNId = Nodes.getOrNull({vta::Variable{ CallSite->getCalledOperand()->stripPointerCastsAndAliases()}}); @@ -152,8 +166,6 @@ void VTAResolver::resolveFunctionPointer(FunctionSetTy &PossibleTargets, if (const auto *Fun = Ty.dyn_cast()) { if (psr::isConsistentCall(CallSite, Fun) && (BaseCallees.empty() || BaseCallees.contains(Fun))) { - // llvm::errs() << " Add possible target " << Fun->getName() - // << " through direct function pointer\n"; PossibleTargets.insert(Fun); } } diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index 933297ef65..9ca875bd37 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -87,7 +87,7 @@ getPointerIndicesOfType(llvm::DICompositeType *Ty, const llvm::DataLayout &DL) { auto PointerSize = DL.getPointerSizeInBits(); - // TODO: Does every type provide a meaningful getSizeInBits? + // XXX: Does every type provide a meaningful getSizeInBits? auto MaxNumPointers = Ty->getSizeInBits() / PointerSize; if (!MaxNumPointers) { return Ret; @@ -230,7 +230,7 @@ static void initializeWithFun(const llvm::Function *Fun, for (const auto &I : llvm::instructions(Fun)) { if (!I.getType()->isPointerTy()) { - // TODO: What about SSA structs that contain pointers? + // XXX: What about SSA structs that contain pointers? continue; } @@ -304,7 +304,6 @@ static void handleGEP(const llvm::GetElementPtrInst *GEP, return; } - // TODO: Is this correct? -- also check load auto From = getGEPNode(GEP, TAG, DL); if (From) { @@ -339,7 +338,7 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, } AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // TODO: Fuse store and GEP! + // XXX: Fuse store and GEP! auto DestNodeId = TAG.get({Variable{Dest}}); if (!DestNodeId) { @@ -380,7 +379,7 @@ static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, } AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // TODO: Fuse store and GEP! + // XXX: Fuse store and GEP! auto DestNodeId = TAG.get({Variable{Dest}}); if (!DestNodeId) { @@ -467,7 +466,7 @@ static void handleCall(const llvm::CallBase *Call, TypeAssignmentGraph &TAG, auto CSNod = TAG.get({Variable{Call}}); - // TODO: Handle struct returns that contain pointers + // XXX: Handle struct returns that contain pointers if (!HasArgNode && !CSNod) { return; } @@ -587,7 +586,7 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, handleReturn(Ret, TAG); return; } - // TODO: Handle more cases + // XXX: Handle more cases } static void buildTAGWithFun(const llvm::Function *Fun, TypeAssignmentGraph &TAG, diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index adb22604db..a3e2a22814 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -81,7 +81,7 @@ static void computeSCCsAndCompare(ExampleGraph &Graph, llvm::ArrayRef> ExpectedSCCs) { auto OutputRec = computeSCCs(Graph); - auto OutputIt = computeSCCIterative(Graph); + auto OutputIt = computeSCCsIterative(Graph); ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) << "Iterative Approach did not reach all nodes\n"; ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) From a5d334c2881f4b1aa1a3eef9d6518da09bbcc220 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 14 Sep 2025 17:37:37 +0200 Subject: [PATCH 21/27] Adapt VTACallGraphTest to TestingSrcLocation + measure timing in call-graph tool + some cleanup --- .../ControlFlow/Resolver/VTAResolver.h | 2 +- include/phasar/Utils.h | 2 + include/phasar/Utils/ChronoUtils.h | 1 - include/phasar/Utils/Timer.h | 31 +++- include/phasar/Utils/TypedVector.h | 9 ++ lib/PhasarLLVM/ControlFlow/ControlFlow.cppm | 1 + .../ControlFlow/Resolver/Resolver.cpp | 9 -- lib/Utils/ChronoUtils.cpp | 2 + lib/Utils/Utils.cppm | 11 ++ tools/call-graph/call-graph.cpp | 17 ++- .../ControlFlow/VTACallGraphTest.cpp | 138 +++++++++++------- 11 files changed, 152 insertions(+), 71 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index 85bdb5f021..1a310dcf1c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -21,7 +21,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" namespace psr { -///\brief A Resolver that uses a variant of the Variable Type Analysis to +/// \brief A Resolver that uses a variant of the Variable Type Analysis to /// resolver indirect calls. /// /// Uses debug-information to achieve better results with C++ virtual calls. diff --git a/include/phasar/Utils.h b/include/phasar/Utils.h index c754608a0a..a034f6c871 100644 --- a/include/phasar/Utils.h +++ b/include/phasar/Utils.h @@ -15,6 +15,7 @@ #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/BoxedPointer.h" #include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Compressor.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/DebugOutput.h" #include "phasar/Utils/EnumFlags.h" @@ -34,6 +35,7 @@ #include "phasar/Utils/StableVector.h" #include "phasar/Utils/Table.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" #endif // PHASAR_UTILS_H diff --git a/include/phasar/Utils/ChronoUtils.h b/include/phasar/Utils/ChronoUtils.h index ca4f4092fd..e4ebc852e2 100644 --- a/include/phasar/Utils/ChronoUtils.h +++ b/include/phasar/Utils/ChronoUtils.h @@ -10,7 +10,6 @@ #ifndef PHASAR_UTILS_CHRONOUTILS_H #define PHASAR_UTILS_CHRONOUTILS_H -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/phasar/Utils/Timer.h b/include/phasar/Utils/Timer.h index f485aa0cba..bbf692c209 100644 --- a/include/phasar/Utils/Timer.h +++ b/include/phasar/Utils/Timer.h @@ -10,17 +10,38 @@ #ifndef PHASAR_UTILS_TIMER_H #define PHASAR_UTILS_TIMER_H +#include "phasar/Utils/ChronoUtils.h" + #include "llvm/ADT/FunctionExtras.h" #include namespace psr { -class Timer { + +class SimpleTimer { +public: + SimpleTimer() noexcept : Start(std::chrono::steady_clock::now()) {} + + [[nodiscard]] hms elapsed() const noexcept { + auto End = std::chrono::steady_clock::now(); + return {End - Start}; + } + [[nodiscard]] std::chrono::nanoseconds elapsedNanos() const noexcept { + auto End = std::chrono::steady_clock::now(); + return End - Start; + } + + void restart() noexcept { Start = std::chrono::steady_clock::now(); } + +private: + std::chrono::steady_clock::time_point Start; +}; + +class Timer : public SimpleTimer { public: Timer(llvm::unique_function WithElapsed) noexcept - : WithElapsed(std::move(WithElapsed)), - Start(std::chrono::steady_clock::now()) {} + : WithElapsed(std::move(WithElapsed)) {} Timer(Timer &&) noexcept = default; Timer &operator=(Timer &&) noexcept = default; @@ -29,14 +50,12 @@ class Timer { ~Timer() { if (WithElapsed) { - auto End = std::chrono::steady_clock::now(); - WithElapsed(End - Start); + WithElapsed(elapsedNanos()); } } private: llvm::unique_function WithElapsed; - std::chrono::steady_clock::time_point Start; }; } // namespace psr diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index 46a56c9b29..547eb0434f 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -23,6 +23,15 @@ #include namespace psr { + +/// Wraps a llvm::SmallVector, allowing index-based access by IdT, instead of +/// size_t. +/// +/// \tparam IdT The index-type that should be used for operator[]. Must be +/// losslessly convertible from and to size_t. +/// \tparam ValueT The usual value_type of SmallVector. +/// \tparam SmallSize The size of the inline-storange of SmallVector (default: +/// 0) template class TypedVector { public: diff --git a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm index 0b5be19763..629bd45daf 100644 --- a/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm +++ b/lib/PhasarLLVM/ControlFlow/ControlFlow.cppm @@ -20,6 +20,7 @@ export namespace psr { using psr::buildLLVMBasedCallGraph; using psr::CFGTraits; using psr::CHAResolver; +using psr::getDefaultEntryPoints; using psr::getEntryFunctions; using psr::getEntryFunctionsMut; using psr::getNonPureVirtualVFTEntry; diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index ad44f3899d..07d77f33f0 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -162,22 +162,13 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, // check potential receiver type const auto *RecType = getReceiverType(CallSite); if (!RecType) { - // llvm::errs() << "No receiver type found for call at " - // << llvmIRToString(Inst) << '\n'; return false; } if (!VTP.hasVFTable(RecType)) { - // llvm::errs() << "Receiver type has no vtable: " << - // llvmTypeToString(RecType) - // << " for call at " << llvmIRToString(Inst) << '\n'; return false; } auto Idx = getVFTIndex(CallSite); - // llvm::errs() << "Retrieved Vtable index is: " << Idx << " for receiver-type - // " - // << llvmTypeToString(RecType) << " for call at " - // << llvmIRToString(Inst) << '\n'; return Idx >= 0; } diff --git a/lib/Utils/ChronoUtils.cpp b/lib/Utils/ChronoUtils.cpp index 6540baf96c..05f23edeaf 100644 --- a/lib/Utils/ChronoUtils.cpp +++ b/lib/Utils/ChronoUtils.cpp @@ -1,5 +1,7 @@ #include "phasar/Utils/ChronoUtils.h" +#include "llvm/Support/Format.h" + llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, const hms &HMS) { return OS << llvm::format("%.2ld:%.2ld:%.2ld:%.6ld", HMS.Hours.count(), HMS.Minutes.count(), HMS.Seconds.count(), diff --git a/lib/Utils/Utils.cppm b/lib/Utils/Utils.cppm index 7f42cf169f..7273ccedc5 100644 --- a/lib/Utils/Utils.cppm +++ b/lib/Utils/Utils.cppm @@ -1,13 +1,16 @@ module; #include "phasar/Utils/AdjacencyList.h" +#include "phasar/Utils/AlignNum.h" #include "phasar/Utils/AnalysisPrinterBase.h" #include "phasar/Utils/AnalysisProperties.h" #include "phasar/Utils/Average.h" +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/BoxedPointer.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/ChronoUtils.h" +#include "phasar/Utils/Compressor.h" #include "phasar/Utils/DFAMinimizer.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/DebugOutput.h" @@ -40,18 +43,22 @@ module; #include "phasar/Utils/TableWrappers.h" #include "phasar/Utils/Timer.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" export module phasar.utils; export namespace psr { using psr::AdjacencyList; +using psr::AlignNum; +using psr::AlignStr; using psr::AnalysisPrinterBase; using psr::AnalysisProperties; using psr::GraphTraits; using psr::to_string; using psr::operator<<; using psr::AnalysisPropertiesMixin; +using psr::BitSet; using psr::BitVectorSet; using psr::BoxedConstPtr; using psr::BoxedPtr; @@ -146,6 +153,7 @@ using psr::adl_to_string; using psr::AreEqualityComparable; using psr::assertAllNotNull; using psr::assertNotNull; +using psr::Compressor; using psr::computePowerSet; using psr::computeSCCDependencies; using psr::computeSCCIterative; @@ -201,13 +209,16 @@ using psr::SCCHolder; using psr::SCCId; using psr::SCCOrder; using psr::scope_exit; +using psr::SimpleTimer; using psr::SmallDenseTable1d; using psr::StableVector; using psr::StringIDLess; using psr::Table; using psr::Timer; using psr::TrueFn; +using psr::TypedVector; using psr::UnorderedSet; using psr::UnorderedTable1d; using psr::variant_idx; + } // namespace psr diff --git a/tools/call-graph/call-graph.cpp b/tools/call-graph/call-graph.cpp index a3417f3fc9..732daec2a2 100644 --- a/tools/call-graph/call-graph.cpp +++ b/tools/call-graph/call-graph.cpp @@ -22,6 +22,7 @@ #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" #include "phasar/Pointer/AliasAnalysisType.h" #include "phasar/Utils/AlignNum.h" +#include "phasar/Utils/Timer.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -88,6 +89,12 @@ static cl::opt IRFile(cl::Positional, cl::Required, cl::desc("The LLVM IR file to analyze"), cl::cat(CGCat)); +struct DiagTimer : psr::SimpleTimer { // NOLINT + DiagTimer(llvm::StringRef Msg) noexcept : Message(Msg) {} + ~DiagTimer() { llvm::errs() << Message << " (" << elapsed() << ")\n"; } + + llvm::StringRef Message; +}; static void computeCGStats(const psr::LLVMBasedCallGraph &CG, llvm::raw_ostream &OS); @@ -95,10 +102,13 @@ int main(int Argc, char *Argv[]) { cl::HideUnrelatedOptions(CGCat); cl::ParseCommandLineOptions(Argc, Argv); + psr::SimpleTimer LoadingTm; auto IRDB = psr::LLVMProjectIRDB::loadOrExit(IRFile); auto VTP = psr::LLVMVFTableProvider(IRDB); auto TH = psr::DIBasedTypeHierarchy(IRDB); auto EntryPoints = psr::getDefaultEntryPoints(IRDB); + llvm::errs() << "Loaded IR and computed helpers (" << LoadingTm.elapsed() + << ")\n"; if (BuildBaseCG && CGType != psr::CallGraphAnalysisType::VTA) { llvm::WithColor::warning() << "The option --build-base-cg only works for " @@ -107,6 +117,8 @@ int main(int Argc, char *Argv[]) { } auto CG = [&] { + DiagTimer Tm{"Created resolver"}; + switch (CGType) { case psr::CallGraphAnalysisType::NORESOLVE: case psr::CallGraphAnalysisType::CHA: @@ -151,7 +163,10 @@ int main(int Argc, char *Argv[]) { return *OS; }; - auto ICF = psr::LLVMBasedICFG(std::move(CG), &IRDB); + auto ICF = [&] { + DiagTimer Tm{"Built call-graph"}; + return psr::LLVMBasedICFG(std::move(CG), &IRDB); + }(); if (EmitCGAsDot) { ICF.print(GetOS()); diff --git a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp index c88e1a643c..8927938429 100644 --- a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -22,8 +22,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/Instruction.h" #include "llvm/Support/raw_ostream.h" +#include "SrcCodeLocationEntry.h" #include "TestConfig.h" #include "gtest/gtest.h" @@ -71,13 +73,15 @@ psr::LLVMBasedCallGraph computeVTACallGraph( return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB)); } -////////////////////////////// +using psr::unittest::LineColFunOp; +using psr::unittest::TestingSrcLocation; + class VTACallGraphTest : public ::testing::Test { protected: static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER(""); struct GroundTruthEntry { - size_t CSId; + TestingSrcLocation CSId; std::set Callees; }; @@ -97,7 +101,8 @@ class VTACallGraphTest : public ::testing::Test { auto CG = computeVTACallGraph(IRDB, VTP, &AS, BaseCG); for (const auto &Entry : GT) { - const auto *CS = IRDB.getInstruction(Entry.CSId); + const auto *CS = llvm::cast( + psr::unittest::testingLocInIR(Entry.CSId, IRDB)); ASSERT_NE(nullptr, CS); ASSERT_TRUE(llvm::isa(CS)) << "CS " << psr::llvmIRToString(CS) << " is no call-site!"; @@ -122,17 +127,22 @@ class VTACallGraphTest : public ::testing::Test { }; TEST_F(VTACallGraphTest, VirtualCallSite_InterProcCallSite) { - doAnalysisAndCompareResults("virtual_callsites/interproc_callsite_cpp_dbg.ll", - { - {17, {"_ZN7Derived3barEv"}}, - }); + doAnalysisAndCompareResults( + "virtual_callsites/interproc_callsite_cpp_dbg.ll", + { + {LineColFunOp{11, 40, "_Z12callFunctionR4Base", + llvm::Instruction::Call}, + {"_ZN7Derived3barEv"}}, + }); } TEST_F(VTACallGraphTest, UninitializedVariables_VirtualCall) { - doAnalysisAndCompareResults("uninitialized_variables/virtual_call_cpp_dbg.ll", - { - {34, {"_Z3barRi", "_Z3fooRi"}}, - }); + doAnalysisAndCompareResults( + "uninitialized_variables/virtual_call_cpp_dbg.ll", + { + {LineColFunOp{16, 11, "main", llvm::Instruction::Call}, + {"_Z3barRi", "_Z3fooRi"}}, + }); } TEST_F(VTACallGraphTest, PathTracing_Inter12) { @@ -140,67 +150,85 @@ TEST_F(VTACallGraphTest, PathTracing_Inter12) { doAnalysisAndCompareResults( "path_tracing/inter_12_cpp_dbg.ll", { - {30, {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, - {39, {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, + {LineColFunOp{16, 3, "main", llvm::Instruction::Call}, + {"_ZN3TwoD0Ev", "_ZN5ThreeD0Ev"}}, + {LineColFunOp{19, 13, "main", llvm::Instruction::Call}, + {"_ZN5Three11assignValueEi", "_ZN3Two11assignValueEi"}}, }); } TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer1) { - doAnalysisAndCompareResults("call_graphs/function_pointer_1_c.ll", - { - {5, {"bar"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/function_pointer_1_c_dbg.ll", + { + {LineColFunOp{9, 27, "main", llvm::Instruction::Call}, {"bar"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer2) { - doAnalysisAndCompareResults("call_graphs/function_pointer_2_cpp.ll", - { - {8, {"_Z3barv"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/function_pointer_2_cpp_dbg.ll", + { + {LineColFunOp{8, 16, "main", llvm::Instruction::Call}, {"_Z3barv"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_FunctionPointer3) { // Note: Although bar is assigned (and part of the TAG), is does not qualify // as psr::isConsistentCall() - doAnalysisAndCompareResults("call_graphs/function_pointer_3_cpp.ll", - { - {11, {/*"_Z3bari",*/ "_Z3foov"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/function_pointer_3_cpp_dbg.ll", + { + {LineColFunOp{10, 16, "main", llvm::Instruction::Call}, + {/*"_Z3bari",*/ "_Z3foov"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall2) { - doAnalysisAndCompareResults("call_graphs/virtual_call_2_cpp_dbg.ll", - { - {20, {"_ZN1B3fooEv"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/virtual_call_2_cpp_dbg.ll", + { + {LineColFunOp{15, 8, "main", llvm::Instruction::Invoke}, + {"_ZN1B3fooEv"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall3) { // Use the dbg version, because VTA relies on !heapallocsite metadata - doAnalysisAndCompareResults("call_graphs/virtual_call_3_cpp_dbg.ll", - { - {19, {"_ZN5AImpl3fooEv"}}, - {26, {"_ZN5AImplD0Ev"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/virtual_call_3_cpp_dbg.ll", + { + {LineColFunOp{14, 0, "main", llvm::Instruction::Call}, + {"_ZN5AImpl3fooEv"}}, + {LineColFunOp{15, 3, "main", llvm::Instruction::Call}, + {"_ZN5AImplD0Ev"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall4) { - doAnalysisAndCompareResults("call_graphs/virtual_call_4_cpp_dbg.ll", - { - {20, {"_ZN1B3fooEv"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/virtual_call_4_cpp_dbg.ll", + { + {LineColFunOp{15, 0, "main", llvm::Instruction::Invoke}, + {"_ZN1B3fooEv"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall5) { // Use the dbg version, because VTA relies on !heapallocsite metadata - doAnalysisAndCompareResults("call_graphs/virtual_call_5_cpp_dbg.ll", - { - {21, {"_ZN1B5VfuncEv"}}, - {28, {"_ZN1BD0Ev"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/virtual_call_5_cpp_dbg.ll", + { + {LineColFunOp{20, 6, "main", llvm::Instruction::Call}, + {"_ZN1B5VfuncEv"}}, + {LineColFunOp{22, 3, "main", llvm::Instruction::Call}, {"_ZN1BD0Ev"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall7) { // Use the dbg version, because VTA relies on !heapallocsite metadata - doAnalysisAndCompareResults("call_graphs/virtual_call_7_cpp_dbg.ll", - { - {24, {"_ZN1A5VfuncEv"}}, - {29, {"_ZN1B5VfuncEv"}}, - {36, {"_ZN1AD0Ev"}}, - }); + doAnalysisAndCompareResults( + "call_graphs/virtual_call_7_cpp_dbg.ll", + { + {LineColFunOp{19, 6, "main", llvm::Instruction::Call}, + {"_ZN1A5VfuncEv"}}, + {LineColFunOp{20, 6, "main", llvm::Instruction::Call}, + {"_ZN1B5VfuncEv"}}, + {LineColFunOp{22, 3, "main", llvm::Instruction::Call}, {"_ZN1AD0Ev"}}, + }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall8) { @@ -209,8 +237,10 @@ TEST_F(VTACallGraphTest, CallGraphs_VirtualCall8) { doAnalysisAndCompareResults( "call_graphs/virtual_call_8_cpp_dbg.ll", { - {22, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, - {27, {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {LineColFunOp{32, 6, "main", llvm::Instruction::Call}, + {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, + {LineColFunOp{33, 6, "main", llvm::Instruction::Call}, + {"_ZZ4mainEN1B3fooEv", "_ZZ4mainEN1C3fooEv"}}, }); } TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { @@ -219,8 +249,10 @@ TEST_F(VTACallGraphTest, CallGraphs_VirtualCall9) { doAnalysisAndCompareResults( "call_graphs/virtual_call_9_cpp_dbg.ll", { - {72, {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, - {79, {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, + {LineColFunOp{57, 6, "main", llvm::Instruction::Call}, + {"_ZN1B3fooEv", "_ZN1C3fooEv", "_ZN1D3fooEv"}}, + {LineColFunOp{58, 3, "main", llvm::Instruction::Call}, + {"_ZN1BD0Ev", "_ZN1CD0Ev", "_ZN1DD0Ev"}}, }); } // TODO: More tests! From c9c1a4be52d884e469a381cb6930278566aabc61 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 14 Sep 2025 18:08:46 +0200 Subject: [PATCH 22/27] Fix AdjacencyList with TypedVector --- include/phasar/Utils/AdjacencyList.h | 6 ++---- include/phasar/Utils/TypedVector.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/phasar/Utils/AdjacencyList.h b/include/phasar/Utils/AdjacencyList.h index 5f9f3616a9..c8e22bb087 100644 --- a/include/phasar/Utils/AdjacencyList.h +++ b/include/phasar/Utils/AdjacencyList.h @@ -154,16 +154,14 @@ struct GraphTraits> { /// Gets a const range of all nodes in graph G template >> - static constexpr llvm::ArrayRef - nodes(const graph_type &G) noexcept { + static constexpr const auto &nodes(const graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } /// Gets a mutable range of all nodes in graph G template >> - static constexpr llvm::MutableArrayRef - nodes(graph_type &G) noexcept { + static constexpr auto &nodes(graph_type &G) noexcept { assert(G.Adj.size() == G.Nodes.size()); return G.Nodes; } diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index 547eb0434f..e72ec141ed 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -93,6 +93,9 @@ class TypedVector { Vec.push_back(std::move(Val)); } + void pop_back() { Vec.pop_back(); } + [[nodiscard]] ValueT pop_back_val() { return Vec.pop_back_val(); } + [[nodiscard]] bool operator==(const TypedVector &Other) const noexcept { return Vec == Other.Vec; } From a0be56f3f016e91e346dd6e97f86b93c4d1d3f2d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 14 Sep 2025 18:28:00 +0200 Subject: [PATCH 23/27] Fix stack-use-after-scope in TypedVector::operator[], materialized in minimizeGraph() --- include/phasar/Utils/TypedVector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index e72ec141ed..70010368ed 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -38,7 +38,7 @@ class TypedVector { TypedVector() noexcept = default; TypedVector(std::initializer_list IList) : Vec(IList) {} TypedVector(size_t Size) : Vec(Size) {} - TypedVector(size_t Size, ValueT Default) : Vec(Size, Default) {}; + TypedVector(size_t Size, ByConstRef Default) : Vec(Size, Default) {}; template explicit TypedVector(Iter From, Iter To) @@ -61,7 +61,7 @@ class TypedVector { return size_t(Id) < size(); } - [[nodiscard]] ByConstRef operator[](IdT Id) const & { + [[nodiscard]] const ValueT &operator[](IdT Id) const & { assert(inbounds(Id)); return Vec[size_t(Id)]; } From fe3307406d93e4b6837e7cd2300057f59783e89e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 23 Sep 2025 12:06:46 +0200 Subject: [PATCH 24/27] some cleanup --- BreakingChanges.md | 4 +++- .../ControlFlow/Resolver/Resolver.h | 4 +--- .../ControlFlow/Resolver/VTAResolver.h | 5 ++++- .../ControlFlow/VTA/TypeAssignmentGraph.h | 9 +++++--- include/phasar/Utils/Compressor.h | 21 +++++++++++++------ include/phasar/Utils/SCCGeneric.h | 11 +++++++++- include/phasar/Utils/TypeTraits.h | 3 +++ include/phasar/Utils/TypedVector.h | 3 ++- tools/call-graph/call-graph.cpp | 2 +- .../ControlFlow/VTACallGraphTest.cpp | 6 ++---- 10 files changed, 47 insertions(+), 21 deletions(-) diff --git a/BreakingChanges.md b/BreakingChanges.md index 8c17daa8ae..50cfe65bd7 100644 --- a/BreakingChanges.md +++ b/BreakingChanges.md @@ -2,7 +2,9 @@ ## development HEAD -*None* +- The `AdjacencyList` struct now now has one more template argument to denote the intege-like `vertex_t` type. It is the second template argument (which previously was the EdgeType). The edge-type is now denoted by the *third* template argument. +- The `AdjacencyList` switches from using `llvm::NoneType` as empty-node marker to `psr::EmptyType` for forward-compatibility with LLVM-16 that removes `llvm::NoneType`. + ## v2503 diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index b3b4366d86..408c9f3204 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -17,12 +17,10 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_RESOLVER_H_ -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DerivedTypes.h" @@ -134,7 +132,7 @@ class Resolver { /// \param Ty Determines the Resolver subclass to instantiate /// \param IRDB The IR code where the Resolver should be based on. Must not be /// nullptr. - /// \param VTP A virtual-table-provides that is used to extract C++-VTables + /// \param VTP A virtual-table-provider that is used to extract C++-VTables /// from the IR. Must not be nullptr. /// \param TH The type-hierarchy implementation to use. Must be non-null, if /// the selected call-graph analysis requires type-hierarchy information; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index 1a310dcf1c..4146974f41 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -10,9 +10,9 @@ #ifndef PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H #define PHASAR_PHASARLLVM_CONTROLFLOW_RESOLVER_VTARESOLVER_H +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/ControlFlow/VTA/TypePropagator.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/MaybeUniquePtr.h" @@ -21,6 +21,9 @@ #include "llvm/ADT/STLFunctionalExtras.h" namespace psr { + +class LLVMProjectIRDB; + /// \brief A Resolver that uses a variant of the Variable Type Analysis to /// resolver indirect calls. /// diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index 9a90188f53..cdd07b3b41 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -23,9 +23,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -37,6 +34,12 @@ class LLVMProjectIRDB; class LLVMVFTableProvider; } // namespace psr +namespace llvm { +class DIType; +class Value; +class Function; +} // namespace llvm + namespace psr::vta { enum class TAGNodeId : uint32_t {}; diff --git a/include/phasar/Utils/Compressor.h b/include/phasar/Utils/Compressor.h index 8a99eef817..883e214fd5 100644 --- a/include/phasar/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -12,10 +12,10 @@ #include "phasar/Utils/ByRef.h" #include "phasar/Utils/TypeTraits.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/SmallVector.h" #include #include @@ -71,12 +71,12 @@ class Compressor>> { } [[nodiscard]] bool inbounds(IdT Idx) const noexcept { - return size_t(Idx) < FromInt.size(); + return FromInt.inbounds(Idx); } [[nodiscard]] T operator[](IdT Idx) const noexcept { assert(inbounds(Idx)); - return FromInt[size_t(Idx)]; + return FromInt[Idx]; } [[nodiscard]] size_t size() const noexcept { return FromInt.size(); } @@ -88,6 +88,8 @@ class Compressor>> { [[nodiscard]] auto begin() const noexcept { return FromInt.begin(); } [[nodiscard]] auto end() const noexcept { return FromInt.end(); } + [[nodiscard]] auto enumerate() const noexcept { return FromInt.enumerate(); } + void clear() noexcept { ToInt.clear(); FromInt.clear(); @@ -95,14 +97,13 @@ class Compressor>> { private: llvm::DenseMap ToInt; - llvm::SmallVector FromInt; + TypedVector FromInt; }; /// \brief A utility class that assigns a sequential Id to every inserted /// object. /// -/// This specialization handles types that cannot be efficiently passed by -/// value +/// This specialization handles types that cannot be efficiently passed by value template class Compressor>> { public: @@ -190,6 +191,14 @@ class Compressor>> { auto begin() const noexcept { return FromInt.begin(); } auto end() const noexcept { return FromInt.end(); } + [[nodiscard]] auto enumerate() const noexcept { + return llvm::map_range(llvm::enumerate(FromInt), + [](const auto &IndexAndVal) { + return std::pair{ + IdT(IndexAndVal.index()), IndexAndVal.value()}; + }); + } + void clear() noexcept { ToInt.clear(); FromInt.clear(); diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 1443adfbf4..24caa34626 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -88,7 +88,7 @@ namespace psr { /// \brief Holds the SCCs of a given graph. Each SCC is assigned a unique /// sequential id. template struct SCCHolder { - TypedVector, 0> SCCOfNode; + TypedVector> SCCOfNode; TypedVector, llvm::SmallVector> NodesInSCC{}; @@ -315,6 +315,9 @@ computeSCCsRec(const G &Graph, typename GraphTraits::vertex_t CurrNode, /// /// Uses Tarjan's algorithm (recursive) to compute the SCCs. template +#if __cplusplus >= 202002L + requires is_const_graph +#endif [[nodiscard]] SCCHolder::vertex_t> computeSCCs(const G &Graph) { using GTraits = psr::GraphTraits; @@ -344,6 +347,9 @@ computeSCCs(const G &Graph) { /// Uses a non-recursive variant of Tarjan's algorithm to compute the SCCs. /// \attention Largely generated by FhGenie GPT o3 Mini, so use with caution! template +#if __cplusplus >= 202002L + requires is_const_graph +#endif SCCHolder>::vertex_t> computeSCCsIterative(const G &Graph) { using GTraits = GraphTraits>; @@ -467,6 +473,9 @@ computeSCCsIterative(const G &Graph) { /// to single nodes. The resulting graph is always a DAG, i.e., it contains no /// cycles template +#if __cplusplus >= 202002L + requires is_const_graph +#endif SCCDependencyGraph::vertex_t> computeSCCDependencies( const G &Graph, const SCCHolder::vertex_t> &SCCs) { diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index e1bdad5bb4..f763507f2a 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -33,6 +33,9 @@ template struct type_identity { template using type_identity = std::type_identity; #endif +/// \file TODO: We should stick to one naming convention here and not mix +/// CamelCase with lower_case! + // NOLINTBEGIN(readability-identifier-naming) namespace detail { diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index 70010368ed..86d6e3f1f2 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -30,7 +30,7 @@ namespace psr { /// \tparam IdT The index-type that should be used for operator[]. Must be /// losslessly convertible from and to size_t. /// \tparam ValueT The usual value_type of SmallVector. -/// \tparam SmallSize The size of the inline-storange of SmallVector (default: +/// \tparam SmallSize The size of the inline-storage of SmallVector (default: /// 0) template class TypedVector { @@ -56,6 +56,7 @@ class TypedVector { [[nodiscard]] bool empty() const noexcept { return Vec.empty(); } [[nodiscard]] bool any() const noexcept { return !Vec.empty(); } [[nodiscard]] size_t size() const noexcept { return Vec.size(); } + [[nodiscard]] size_t capacity() const noexcept { return Vec.capacity(); } [[nodiscard]] bool inbounds(IdT Id) const noexcept { return size_t(Id) < size(); diff --git a/tools/call-graph/call-graph.cpp b/tools/call-graph/call-graph.cpp index 732daec2a2..93bd1986af 100644 --- a/tools/call-graph/call-graph.cpp +++ b/tools/call-graph/call-graph.cpp @@ -86,7 +86,7 @@ static cl::opt cl::cat(CGCat)); static cl::opt IRFile(cl::Positional, cl::Required, - cl::desc("The LLVM IR file to analyze"), + cl::desc(""), cl::cat(CGCat)); struct DiagTimer : psr::SimpleTimer { // NOLINT diff --git a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp index 8927938429..977cc89f24 100644 --- a/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/VTACallGraphTest.cpp @@ -42,7 +42,7 @@ namespace { std::vector getEntryPoints(const psr::LLVMProjectIRDB &IRDB) { std::vector EntryPoints; - /////////////////////////////////// + if (IRDB.getFunctionDefinition("main")) { EntryPoints.emplace_back("main"); } else { @@ -54,14 +54,12 @@ std::vector getEntryPoints(const psr::LLVMProjectIRDB &IRDB) { } return EntryPoints; } -///////////////////////////// + psr::LLVMBasedCallGraph createBaseCG(psr::LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, const psr::DIBasedTypeHierarchy &TH, psr::LLVMAliasInfoRef /*PT*/) { psr::RTAResolver Res(&IRDB, &VTP, &TH); - - /////////////////////////////////// return psr::buildLLVMBasedCallGraph(IRDB, Res, getEntryPoints(IRDB), psr::Soundness::Soundy); } From 79b1870c294ba6f74631cf684ab494041568f3c0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 2 Oct 2025 20:47:37 +0200 Subject: [PATCH 25/27] Replace Tarjan's algorithm with Pearce's algorithm for computing SCCs. This let's us compute SCCs and topological sorting in a single pass over the graph and also gets rid of the recursion --- include/phasar/Utils/IotaIterator.h | 47 +- include/phasar/Utils/SCCGeneric.h | 514 +++++++++--------- include/phasar/Utils/TypeTraits.h | 9 - .../ControlFlow/Resolver/VTAResolver.cpp | 5 +- unittests/Utils/SCCGenericTest.cpp | 491 ++++++++++++++++- 5 files changed, 775 insertions(+), 291 deletions(-) diff --git a/include/phasar/Utils/IotaIterator.h b/include/phasar/Utils/IotaIterator.h index facd03efcc..c01805f688 100644 --- a/include/phasar/Utils/IotaIterator.h +++ b/include/phasar/Utils/IotaIterator.h @@ -12,6 +12,7 @@ #include "phasar/Utils/TypeTraits.h" +#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include @@ -19,38 +20,48 @@ #include namespace psr { -/// An iterator that iterates over the same value a specified number of times -template class IotaIterator { +/// An iterator that iterates over a numeric range, where the start value is +/// always incremented by one. +template +class IotaIterator + : public llvm::iterator_facade_base, + std::random_access_iterator_tag, T, + ptrdiff_t, const T *, T> { + using base_t = llvm::iterator_facade_base, + std::random_access_iterator_tag, T, + ptrdiff_t, const T *, T>; + public: - using value_type = T; - using reference = T; - using pointer = const T *; - using difference_type = ptrdiff_t; - using iterator_category = std::forward_iterator_tag; + using typename base_t::difference_type; + using typename base_t::iterator_category; + using typename base_t::pointer; + using typename base_t::reference; + using typename base_t::value_type; constexpr reference operator*() const noexcept { return Elem; } constexpr pointer operator->() const noexcept { return &Elem; } - constexpr IotaIterator &operator++() noexcept { - if constexpr (is_incrementable) { - ++Elem; - } else { - Elem = T(size_t(Elem) + 1); - } + constexpr IotaIterator &operator+=(difference_type N) noexcept { + Elem = T(difference_type(Elem) + N); return *this; } - constexpr IotaIterator operator++(int) noexcept { - auto Ret = *this; - ++*this; - return Ret; + constexpr IotaIterator &operator-=(difference_type N) noexcept { + Elem = T(difference_type(Elem) - N); + return *this; + } + constexpr bool operator<(const IotaIterator &Other) const noexcept { + return difference_type(Other.Elem) < difference_type(Elem); } - constexpr bool operator==(const IotaIterator &Other) const noexcept { return Other.Elem == Elem; } constexpr bool operator!=(const IotaIterator &Other) const noexcept { return !(*this == Other); } + constexpr difference_type + operator-(const IotaIterator &Other) const noexcept { + return difference_type(Elem) - difference_type(Other.Elem); + } constexpr explicit IotaIterator(T Elem) noexcept : Elem(Elem) {} diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 24caa34626..1663089ae6 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -20,7 +20,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -44,6 +46,10 @@ struct SCCIdBase { return Value; } + explicit constexpr operator ptrdiff_t() const noexcept { + return ptrdiff_t(Value); + } + constexpr uint32_t operator+() const noexcept { return Value; } friend constexpr bool operator==(SCCIdBase L, SCCIdBase R) noexcept { @@ -225,250 +231,6 @@ template struct SCCOrder { llvm::SmallVector, 0> SCCIds; }; -namespace detail { - -template struct SCCData { - TypedVector Disc; - TypedVector Low; - BitSet OnStack; - llvm::SmallVector Stack; - uint32_t Time = 0; - BitSet Seen; - - explicit SCCData(size_t NumFuns) - : Disc(NumFuns, UINT32_MAX), Low(NumFuns, UINT32_MAX), OnStack(NumFuns), - Seen(NumFuns) {} -}; - -template struct SCCDataIt : SCCData { - llvm::SmallVector> CallStack; - - using SCCData::SCCData; -}; - -template constexpr void setMin(T &InOut, T Other) noexcept { - if (Other < InOut) { - InOut = Other; - } -} - -template -static void -computeSCCsRec(const G &Graph, typename GraphTraits::vertex_t CurrNode, - SCCData::vertex_t> &Data, - SCCHolder::vertex_t> &Holder) { - // See - // https://www.geeksforgeeks.org/tarjan-algorithm-find-strongly-connected-components - - auto CurrTime = Data.Time++; - Data.Disc[CurrNode] = CurrTime; - Data.Low[CurrNode] = CurrTime; - Data.Stack.push_back(CurrNode); - Data.OnStack.insert(CurrNode); - - using GTraits = psr::GraphTraits; - using detail::setMin; - using SCCId = psr::SCCId::vertex_t>; - - for (const auto &OutEdge : GTraits::outEdges(Graph, CurrNode)) { - auto SuccNode = GTraits::target(OutEdge); - if (Data.Disc[SuccNode] == UINT32_MAX) { - // Tree-edge: Not seen yet --> recurse - - computeSCCsRec(Graph, SuccNode, Data, Holder); - setMin(Data.Low[CurrNode], Data.Low[SuccNode]); - } else if (Data.OnStack.contains(SuccNode)) { - // Back-edge --> circle! - - setMin(Data.Low[CurrNode], Data.Disc[SuccNode]); - } - } - - if (Data.Low[CurrNode] == Data.Disc[CurrNode]) { - // Found SCC - - auto SCCIdx = SCCId(Holder.NodesInSCC.size()); - auto &NodesInSCC = Holder.NodesInSCC.emplace_back(); - - assert(!Data.Stack.empty()); - - while (Data.Stack.back() != CurrNode) { - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[Fun] = SCCIdx; - Data.OnStack.erase(Fun); - Data.Seen.insert(Fun); - NodesInSCC.push_back(Fun); - } - - auto Fun = Data.Stack.pop_back_val(); - Holder.SCCOfNode[Fun] = SCCIdx; - Data.OnStack.erase(Fun); - Data.Seen.insert(Fun); - NodesInSCC.push_back(Fun); - } -} - -} // namespace detail - -/// \brief Computes the strongly-connected components (SCCs) of a given graph. -/// The graph should conform to the is_const_graph concept. -/// -/// Uses Tarjan's algorithm (recursive) to compute the SCCs. -template -#if __cplusplus >= 202002L - requires is_const_graph -#endif -[[nodiscard]] SCCHolder::vertex_t> -computeSCCs(const G &Graph) { - using GTraits = psr::GraphTraits; - - SCCHolder Ret{}; - - auto NumNodes = GTraits::size(Graph); - Ret.SCCOfNode.resize(NumNodes); - - if (!NumNodes) { - return Ret; - } - - detail::SCCData Data(NumNodes); - for (auto VtxId : GTraits::vertices(Graph)) { - if (!Data.Seen.contains(VtxId)) { - computeSCCsRec(Graph, VtxId, Data, Ret); - } - } - - return Ret; -} - -/// \brief Computes the strongly-connected components (SCCs) of a given graph. -/// The graph should conform to the is_const_graph concept. -/// -/// Uses a non-recursive variant of Tarjan's algorithm to compute the SCCs. -/// \attention Largely generated by FhGenie GPT o3 Mini, so use with caution! -template -#if __cplusplus >= 202002L - requires is_const_graph -#endif -SCCHolder>::vertex_t> -computeSCCsIterative(const G &Graph) { - using GTraits = GraphTraits>; - using VertexTy = typename GTraits::vertex_t; - using EdgeTy = typename GTraits::edge_t; - using SCCId = psr::SCCId; - const int UNVISITED = -1; - - // Number of nodes (vertices are assumed to be consecutive indices). - size_t NumNodes = GTraits::size(Graph); - - // discovery index. - TypedVector Dfn(NumNodes, UNVISITED); - - // smallest index reachable. - TypedVector Lowlink(NumNodes, 0); - - // marker for Tarjan's stack. - BitSet InStack(NumNodes, false); - - int CurrentIndex = 0; - - // Our final SCC holder. Pre-resize SCCOfNode to the number of nodes. - SCCHolder Holder; - Holder.SCCOfNode.resize(NumNodes); - - // Instead of storing a vector of out-edges, we store an iterator pair. - using OutEdgeRange = - decltype(GTraits::outEdges(Graph, std::declval())); - using OutEdgeIterator = decltype(std::begin(std::declval())); - - // DFS frame holding current vertex and its edge iterator range. - struct DFSFrame { - VertexTy V; - OutEdgeIterator It; - OutEdgeIterator ItEnd; - }; - llvm::SmallVector DfsStack; - // Tarjan's stack (vertices in the current DFS path). - llvm::SmallVector S; - - // Helper to push a new DFS frame. - const auto PushFrame = [&](VertexTy V) { - auto &&Range = GTraits::outEdges(Graph, V); - static_assert( - std::is_lvalue_reference_v || - std::is_trivially_destructible_v>, - "We assume that outEdges gives either a reference or a view into the " - "out-edges, but never an owning container by value. Otherwise, the " - "DFSFrame iterators may be dangling"); - - DfsStack.emplace_back(DFSFrame{ - V, - std::begin(Range), - std::end(Range), - }); - }; - - // Iterate over all vertices (assumed to be dense). - for (const auto &V : GTraits::vertices(Graph)) { - if (Dfn[V] != UNVISITED) { - continue; // already visited - } - - PushFrame(V); - Dfn[V] = CurrentIndex; - Lowlink[V] = CurrentIndex; - CurrentIndex++; - S.push_back(V); - InStack.insert(V); - - // DFS simulation using the explicit stack. - while (!DfsStack.empty()) { - DFSFrame &Frame = DfsStack.back(); - VertexTy U = Frame.V; - if (Frame.It != Frame.ItEnd) { - // Process the next outgoing edge. - const EdgeTy &Edge = *(Frame.It++); - VertexTy W = GTraits::target(Edge); - if (Dfn[W] == UNVISITED) { - // w is newly discovered. - PushFrame(W); - Dfn[W] = CurrentIndex; - Lowlink[W] = CurrentIndex; - CurrentIndex++; - S.push_back(W); - InStack.insert(W); - } else if (InStack.contains(W)) { - // w is in the current DFS path; update lowlink. - detail::setMin(Lowlink[U], Dfn[W]); - } - } else { - // Done exploring u. - if (Lowlink[U] == Dfn[U]) { - // u is the root of an SCC; pop from S until u is reached. - auto &Comp = Holder.NodesInSCC.emplace_back(); // The new SCC. - VertexTy W; - do { - W = S.back(); - S.pop_back(); - InStack.erase(W); - // Assign w the current SCC id. - Holder.SCCOfNode[W] = SCCId(Holder.size()); - Comp.push_back(W); - } while (W != U); - } - DfsStack.pop_back(); - if (!DfsStack.empty()) { - // After returning, update the parent's lowlink. - VertexTy Parent = DfsStack.back().V; - detail::setMin(Lowlink[Parent], Lowlink[U]); - } - } - } - } - - return Holder; -} - /// \brief Creates a graph based on the given input Graph, collapsing all SCCs /// to single nodes. The resulting graph is always a DAG, i.e., it contains no /// cycles @@ -543,6 +305,270 @@ computeSCCOrder(const SCCHolder &SCCs, return Ret; } + +namespace detail { +/// Data for Pearce's Algorithm. +template struct Pearce4Data { + TypedVector RIndex; // only per-vertex array + BitSet Root; // root[v] in Algorithm 4 + uint32_t Index = 1; // DFS counter + uint32_t C; // SCC id counter + llvm::SmallVector Stack; + + explicit Pearce4Data(size_t Num) + : RIndex(Num, 0), Root(Num), C(Num ? Num - 1 : 0) {} +}; + +// Recursive variant of Pearce's algorithm (based on Algo 3 in the paper) +template +static void +pearce4VisitRec(const G &Graph, typename GraphTraits::vertex_t V, + Pearce4Data::vertex_t> &Data, + SCCHolder::vertex_t> &Holder) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + using SCCId = psr::SCCId; + + bool Root = true; + Data.RIndex[V] = Data.Index++; + + for (const auto &Edge : GTraits::outEdges(Graph, V)) { + auto W = GTraits::target(Edge); + if (Data.RIndex[W] == 0) { + pearce4VisitRec(Graph, W, Data, Holder); + } + if (Data.RIndex[W] < Data.RIndex[V]) { + Data.RIndex[V] = Data.RIndex[W]; + Root = false; + } + } + + if (Root) { + Data.Index--; + auto NewSCC = SCCId(Holder.NodesInSCC.size()); + auto &Nodes = Holder.NodesInSCC.emplace_back(); + + while (!Data.Stack.empty() && + Data.RIndex[V] <= Data.RIndex[Data.Stack.back()]) { + auto W = Data.Stack.pop_back_val(); + Data.RIndex[W] = Data.C; + Data.Index--; + + Holder.SCCOfNode[W] = NewSCC; + Nodes.push_back(W); + } + Nodes.push_back(V); + Holder.SCCOfNode[V] = NewSCC; + Data.RIndex[V] = Data.C; + Data.C--; + } else { + Data.Stack.push_back(V); + } +} + +// Iterative variant of Pearce's algorithm (adapted from on Algo 4 in the paper) +template +static void +pearce4VisitIt(const G &Graph, typename GraphTraits::vertex_t Start, + Pearce4Data::vertex_t> &Data, + SCCHolder::vertex_t> &Holder) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + using SCCId = psr::SCCId; + + using OutEdgeRange = + decltype(GTraits::outEdges(Graph, std::declval())); + using OutEdgeIterator = decltype(std::begin(std::declval())); + using OutEdgeSentinel = decltype(std::end(std::declval())); + + struct DfsFrame { + Vertex CurrVtx; + OutEdgeIterator It; + [[no_unique_address]] OutEdgeSentinel End; + }; + + llvm::SmallVector CallStack; + + const auto PushFrames = [&](Vertex V, DfsFrame *Frame) { + if (Frame->It == Frame->End) { + return false; + } + // Recurse into children until reaching the bottom + do { + auto W = GTraits::target(*Frame->It); + + if (Data.RIndex[W] != 0) { + // Already pushed the children of W + break; + } + + Data.RIndex[W] = Data.Index++; + Data.Root.insert(W); + + auto &&OutEdges = GTraits::outEdges(Graph, W); + Frame = &CallStack.emplace_back( + DfsFrame{W, std::begin(OutEdges), std::end(OutEdges)}); + V = W; + + } while (Frame->It != Frame->End); + + return true; + }; + + const auto VisitLoop = [&](Vertex V, DfsFrame &Frame) { + // Finish visiting the current child and advance to the next child + if (Frame.It != Frame.End) { + auto W = GTraits::target(*Frame.It); + if (Data.RIndex[W] < Data.RIndex[V]) { + Data.RIndex[V] = Data.RIndex[W]; + Data.Root.erase(V); + } + + ++Frame.It; + } + }; + + const auto FinishFrame = [&](Vertex V) { + // finish visiting V and backtrack to the parent + + if (Data.Root.contains(V)) { + // Found a SCC + + Data.Index--; + auto NewSCC = SCCId(Holder.NodesInSCC.size()); + auto &Nodes = Holder.NodesInSCC.emplace_back(); + while (!Data.Stack.empty() && + Data.RIndex[V] <= Data.RIndex[Data.Stack.back()]) { + auto W = Data.Stack.pop_back_val(); + Data.RIndex[W] = Data.C; + Data.Index--; + + Holder.SCCOfNode[W] = NewSCC; + Nodes.push_back(W); + } + Nodes.push_back(V); + Holder.SCCOfNode[V] = NewSCC; + Data.RIndex[V] = Data.C; + Data.C--; + } else { + Data.Stack.push_back(V); + } + + CallStack.pop_back(); + }; + + // Initialize the callstack by pushing the initial frame + Data.RIndex[Start] = Data.Index++; + Data.Root.insert(Start); + { + auto &&OutEdges = GTraits::outEdges(Graph, Start); + static_assert( + std::is_lvalue_reference_v || + std::is_trivially_destructible_v>, + "We assume that outEdges gives either a reference or a view into " + "the out-edges, but never an owning container by value. Otherwise, " + "the DFSFrame iterators may be dangling"); + CallStack.emplace_back( + DfsFrame{Start, std::begin(OutEdges), std::end(OutEdges)}); + } + + // Simulate the recursion + + PushFrames(Start, &CallStack.back()); + while (true) { + auto &Frame = CallStack.back(); + Vertex V = Frame.CurrVtx; + VisitLoop(V, Frame); + if (PushFrames(V, &Frame)) { + continue; // we don't pop from the callstack here + } + + FinishFrame(V); + if (CallStack.empty()) { + break; + } + } + + if (!Data.Stack.empty()) { + auto NewSCC = SCCId(Holder.NodesInSCC.size()); + auto &Nodes = Holder.NodesInSCC.emplace_back(); + Nodes.reserve(Data.Stack.size()); + for (auto Vtx : Data.Stack) { + Nodes.push_back(Vtx); + Holder.SCCOfNode[Vtx] = NewSCC; + } + Data.Stack.clear(); + } +} + +} // namespace detail + +/// Compute SCCs adapted from the paper "A Space-Efficient Algorithm for Finding +/// Strongly Connected Components", Pearce 2015, DOI: +/// +/// +/// \tparam G The graph-type +/// \tparam Iterative Whether to use the iterative or recursive variant of the +/// algorithm (default: true) +/// \param Graph The graph for with to compute SCCs and topological ordering +template +[[nodiscard]] SCCHolder::vertex_t> +computeSCCs(const G &Graph, std::bool_constant /*Iterative*/ = {}) { + using GTraits = psr::GraphTraits; + using Vertex = typename GTraits::vertex_t; + + SCCHolder Ret; + auto N = GTraits::size(Graph); + if (!N) { + return Ret; + } + + Ret.SCCOfNode.resize(N); + + { + detail::Pearce4Data Data(N); + + // for all v ∈ V do if rindex[v]==0 then visit(v) + for (auto V : GTraits::vertices(Graph)) { + if (Data.RIndex[V] == 0) { + if constexpr (Iterative) { + detail::pearce4VisitIt(Graph, V, Data, Ret); + } else { + detail::pearce4VisitRec(Graph, V, Data, Ret); + } + } + } + } + + return Ret; +} + +/// Compute SCCs and a topological ordering on the SCCs, adapted from the paper +/// "A Space-Efficient Algorithm for Finding Strongly Connected Components", +/// Pearce 2015, DOI: +/// +/// \tparam G The graph-type \tparam Iterative Whether to use the iterative +/// or recursive variant of the algorithm (default: true) \param Graph The graph +/// for with to compute SCCs and topological ordering +template +[[nodiscard]] std::pair::vertex_t>, + SCCOrder::vertex_t>> +computeSCCsAndTopologicalOrder( + const G &Graph, std::bool_constant /*Iterative*/ = {}) { + using Vertex = typename GraphTraits::vertex_t; + + std::pair, SCCOrder> Ret = { + computeSCCs(Graph, std::bool_constant{}), + {}, + }; + + // Pearce's algorithm produces SCCs in reverse topological order + auto Ids = llvm::reverse(psr::iota>(Ret.first.size())); + Ret.second.SCCIds.append(Ids.begin(), Ids.end()); + + return Ret; +} + } // namespace psr #endif diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index f763507f2a..b9aef51b52 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -211,12 +211,6 @@ struct has_llvm_dense_map_info< std::declval()))>> : std::true_type {}; -template -struct is_incrementable : std::false_type {}; -template -struct is_incrementable())>> - : std::true_type {}; - template struct is_explicitly_convertible_to : std::false_type {}; template @@ -304,9 +298,6 @@ constexpr bool has_llvm_dense_map_info = detail::has_llvm_dense_map_info::value; template using type_identity_t = typename type_identity::type; -template -PSR_CONCEPT is_incrementable = detail::is_incrementable::value; - template PSR_CONCEPT is_explicitly_convertible_to = detail::is_explicitly_convertible_to::value; diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp index 9a54b764ef..7226938501 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -54,14 +54,15 @@ VTAResolver::VTAResolver( auto TAG = vta::computeTypeAssignmentGraph( *IRDB, *VTP, AS, *this->BaseResolver, ReachableFunctions); - SCCs = computeSCCs(TAG); + auto [SCCs, Order] = computeSCCsAndTopologicalOrder(TAG); auto Deps = computeSCCDependencies(TAG, SCCs); - auto Order = computeSCCOrder(SCCs, Deps); + TA = vta::propagateTypes(TAG, SCCs, Deps, Order); // TAG.print(llvm::errs()); // TA.print(llvm::errs(), TAG, SCCs); + this->SCCs = std::move(SCCs); Nodes = std::move(TAG.Nodes); } diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index a3e2a22814..8aa0fb1aeb 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright (c) 2024 Fabian Schiebel. + * Copyright (c) 2025 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * @@ -11,7 +11,6 @@ #include "phasar/Utils/AdjacencyList.h" #include "phasar/Utils/EmptyBaseOptimizationUtils.h" -#include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/TypedVector.h" @@ -19,11 +18,7 @@ #include "gtest/gtest.h" -#include - -//===----------------------------------------------------------------------===// -// Unit tests for the generic SCC algorithm - +namespace { using namespace psr; enum class NodeId : uint32_t {}; @@ -77,27 +72,36 @@ static void compareSCCs(const SCCHolder &ComputedSCCs, } } +static void validateTopologicalOrder(const ExampleGraph &Graph, + const SCCHolder &ComputedSCCs) { + // Note: Pearce's algorithm produces SCCs in reverse-topological order + for (auto [Vtx, SCC] : ComputedSCCs.SCCOfNode.enumerate()) { + for (auto Succ : Graph.Adj[Vtx]) { + auto SuccSCC = ComputedSCCs.SCCOfNode[Succ]; + EXPECT_LE(+SuccSCC, +SCC); + } + } +} + static void computeSCCsAndCompare(ExampleGraph &Graph, llvm::ArrayRef> ExpectedSCCs) { - auto OutputRec = computeSCCs(Graph); - auto OutputIt = computeSCCsIterative(Graph); - ASSERT_EQ(OutputIt.SCCOfNode.size(), Graph.Adj.size()) - << "Iterative Approach did not reach all nodes\n"; - ASSERT_EQ(OutputRec.SCCOfNode.size(), Graph.Adj.size()) - << "Recursive Approach did not reach all nodes\n"; + auto ComputedSCCs = computeSCCs(Graph); + ASSERT_EQ(ComputedSCCs.SCCOfNode.size(), Graph.Adj.size()) + << "Pearce's Approach did not reach all nodes\n"; #if __cplusplus >= 202002L - [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, OutputRec); + [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, ComputedSCCs); static_assert(is_const_graph); #endif auto GroundTruth = makeGTSCCs(ExpectedSCCs); - compareSCCs(OutputRec, GroundTruth, "RecursiveTarjan"); - compareSCCs(OutputIt, GroundTruth, "IterativeTarjan"); + compareSCCs(ComputedSCCs, GroundTruth, "Pearce"); + validateTopologicalOrder(Graph, ComputedSCCs); - // printGraph(Graph, llvm::outs(), "ExampleGraph"); - OutputRec.print(Graph, llvm::outs(), "ExampleGraph"); + if (::testing::Test::HasFailure()) { + ComputedSCCs.print(Graph, llvm::outs(), "ExampleGraph"); + } } TEST(SCCGenericTest, SCCTest01) { @@ -174,6 +178,457 @@ TEST(SCCGenericTest, SCCTest06) { {{0, 1, 2, 3, 4, 5, 6, 7}, {8, 9, 10, 11, 12, 13}, {14}, {15}, {16}}); } +// Note: Following tests generated by ChatGPT + +// SCC test: two disjoint cycles +TEST(SCCGenericTest, SCCTest07) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(0)}, {NodeId(3)}, {NodeId(2)}}}; + computeSCCsAndCompare(Graph, {{0, 1}, {2, 3}}); +} + +// SCC test: diamond shape, no cycles +TEST(SCCGenericTest, SCCTest08) { + ExampleGraph Graph{{{NodeId(1), NodeId(2)}, {NodeId(3)}, {NodeId(3)}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}, {3}}); +} + +// SCC test: diamond with back edge creating cycle +TEST(SCCGenericTest, SCCTest09) { + ExampleGraph Graph{ + {{NodeId(1), NodeId(2)}, {NodeId(3)}, {NodeId(3)}, {NodeId(0)}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: one self-loop, others acyclic +TEST(SCCGenericTest, SCCTest10) { + ExampleGraph Graph{{{NodeId(0)}, {NodeId(2)}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}}); +} + +// SCC test: disconnected nodes +TEST(SCCGenericTest, SCCTest11) { + ExampleGraph Graph{{{}, {}, {}}}; + computeSCCsAndCompare(Graph, {{0}, {1}, {2}}); +} + +// SCC test: complex graph with two larger SCCs and one singleton +TEST(SCCGenericTest, SCCTest12) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle 0-1-2 + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // cycle 3-4-5 + {}}}; + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6}}); +} + +// SCC test: nested cycles sharing a node +TEST(SCCGenericTest, SCCTest13) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0), NodeId(3)}, + {NodeId(4)}, + {NodeId(2)}}}; + // 0-1-2 form a cycle, and 2-3-4 also cycle back to 2 => all {0,1,2,3,4} + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}}); +} + +// SCC test: long linear chain ending in a self-loop +TEST(SCCGenericTest, SCCTest14) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(2)}, {NodeId(3)}, {NodeId(3)}}}; + // nodes 0,1,2 feed into 3; node 3 has self-loop + computeSCCsAndCompare(Graph, {{0}, {1}, {2}, {3}}); +} + +// SCC test: three SCCs connected in DAG shape +TEST(SCCGenericTest, SCCTest15) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(0)}, // SCC {0,1} + {NodeId(3)}, + {NodeId(2)}, // SCC {2,3} + {NodeId(5)}, + {NodeId(4)}}}; // SCC {4,5} + computeSCCsAndCompare(Graph, {{0, 1}, {2, 3}, {4, 5}}); +} + +// SCC test: two big SCCs connected by single edge +TEST(SCCGenericTest, SCCTest16) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle 0-1-2 + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3), NodeId(0)}}}; // cycle 3-4-5, with edge 5->0 + // Two SCCs {0,1,2} and {3,4,5}; edge {3,4,5} -> {0,1,2} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}}); +} + +// SCC test: large cycle with attached tail +TEST(SCCGenericTest, SCCTest17) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(0)}, // cycle 0-1-2-3-4-0 + {NodeId(0)}}}; // tail node 5 -> 0 + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, {5}}); +} + +// SCC test: two SCCs joined by a “bow-tie” structure +TEST(SCCGenericTest, SCCTest18) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // left cycle {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // right cycle {3,4,5} + {NodeId(0), NodeId(3)}}}; // node 6 links both + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6}}); +} + +// SCC test: complete bipartite between {0,1} and {2,3} +TEST(SCCGenericTest, SCCTest19) { + ExampleGraph Graph{{{NodeId(2), NodeId(3)}, + {NodeId(2), NodeId(3)}, + {NodeId(0), NodeId(1)}, + {NodeId(0), NodeId(1)}}}; + // All nodes strongly connected + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: three SCCs connected linearly +TEST(SCCGenericTest, SCCTest20) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle {0,1,2} + {NodeId(4)}, + {NodeId(3)}, // cycle {3,4} + {NodeId(6)}, + {NodeId(5)}}}; // cycle {5,6} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4}, {5, 6}}); +} + +// SCC test: complex graph with interleaved cycles +TEST(SCCGenericTest, SCCTest21) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // cycle {0,1,2} + {NodeId(1), NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // cycle {3,4,5} + {NodeId(7)}, + {NodeId(6)}}}; // cycle {6,7} + // SCCs: {0,1,2}, {3,4,5}, {6,7} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6, 7}}); +} + +// SCC test: “ladder” structure with rungs forming cycles +TEST(SCCGenericTest, SCCTest22) { + ExampleGraph Graph{{{NodeId(1), NodeId(2)}, + {NodeId(0), NodeId(3)}, + {NodeId(0), NodeId(3)}, + {NodeId(1), NodeId(2)}}}; + // Essentially two squares connected; all nodes mutually reachable + computeSCCsAndCompare(Graph, {{0, 1, 2, 3}}); +} + +// SCC test: disconnected large SCCs plus singletons +TEST(SCCGenericTest, SCCTest23) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // SCC {0,1,2} + {NodeId(4)}, + {NodeId(3)}, // SCC {3,4} + {}, + {}, // nodes 5,6 isolated + {NodeId(9)}, + {NodeId(9)}, + {NodeId(8)}}}; // SCC {8,9} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4}, {5}, {6}, {7}, {8, 9}}); +} + +// SCC test: 12-node graph with 4 SCCs, each of size 3 +TEST(SCCGenericTest, SCCTest24) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, + {NodeId(8)}, + {NodeId(6)}, // {6,7,8} + {NodeId(10)}, + {NodeId(11)}, + {NodeId(9)}}}; // {9,10,11} + computeSCCsAndCompare(Graph, {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}); +} + +// SCC test: 15-node graph with one large SCC and dangling tails +TEST(SCCGenericTest, SCCTest25) { + ExampleGraph Graph{{ + {NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(0)}, // {0,1,2,3} + {NodeId(5)}, + {NodeId(4)}, // {4,5} + {NodeId(7)}, + {NodeId(8)}, + {NodeId(6)}, // {6,7,8} + {NodeId(0)}, + {NodeId(4)}, + {NodeId(6)}, // tails into SCCs + {}, + {}, + {} // 3 isolated + }}; + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3}, {4, 5}, {6, 7, 8}, {9}, {10}, {11}, {12}, {13}, {14}}); +} + +// SCC test: 16-node graph with interlinked clusters +TEST(SCCGenericTest, SCCTest26) { + ExampleGraph Graph{ + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, + {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, + {NodeId(6)}, // {6,7} + {NodeId(9)}, + {NodeId(10)}, + {NodeId(8)}, // {8,9,10} + {NodeId(12)}, + {NodeId(11)}, // {11,12} + {NodeId(0), NodeId(3), NodeId(6), NodeId(8)}, // 13 links clusters + {NodeId(13)}, // 14 -> 13 + {NodeId(14)}}}; // 15 -> 14 -> 13 + computeSCCsAndCompare( + Graph, + {{0, 1, 2}, {3, 4, 5}, {6, 7}, {8, 9, 10}, {11, 12}, {13}, {14}, {15}}); +} + +// SCC test: 18-node graph forming a big cycle plus smaller SCCs +TEST(SCCGenericTest, SCCTest27) { + ExampleGraph Graph{{{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(0)}, // 0-11 cycle + {NodeId(13)}, + {NodeId(12)}, // {12,13} + {NodeId(15)}, + {NodeId(14)}, // {14,15} + {NodeId(17)}, + {NodeId(16)}}}; // {16,17} + computeSCCsAndCompare( + Graph, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, {12, 13}, {14, 15}, {16, 17}}); +} + +// SCC test: 20-node graph with mixed SCC sizes +TEST(SCCGenericTest, SCCTest28) { + ExampleGraph Graph{{{NodeId(1)}, {NodeId(2)}, + {NodeId(0)}, // {0,1,2} + {NodeId(4)}, {NodeId(5)}, + {NodeId(3)}, // {3,4,5} + {NodeId(7)}, {NodeId(6)}, // {6,7} + {NodeId(9)}, {NodeId(8)}, // {8,9} + {NodeId(10)}, {NodeId(11)}, + {NodeId(10)}, {NodeId(12)}, + {NodeId(15)}, {NodeId(14)}, // {14,15} + {NodeId(17)}, {NodeId(18)}, + {NodeId(19)}, {}}}; // chain 16->17->18->19->isolated + computeSCCsAndCompare(Graph, {{0, 1, 2}, + {3, 4, 5}, + {6, 7}, + {8, 9}, + {10}, + {11}, + {12}, + {13}, + {14, 15}, + {16}, + {17}, + {18}, + {19}}); +} + +// SCC test: 25-node graph, 5 clusters of 5 nodes each forming cycles +TEST(SCCGenericTest, SCCTest29) { + ExampleGraph Graph{// Cluster 0: nodes 0-4 cycle + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(0)}, + // Cluster 1: nodes 5-9 cycle + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(5)}, + // Cluster 2: nodes 10-14 cycle + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13)}, + {NodeId(14)}, + {NodeId(10)}, + // Cluster 3: nodes 15-19 cycle + {NodeId(16)}, + {NodeId(17)}, + {NodeId(18)}, + {NodeId(19)}, + {NodeId(15)}, + // Cluster 4: nodes 20-24 cycle + {NodeId(21)}, + {NodeId(22)}, + {NodeId(23)}, + {NodeId(24)}, + {NodeId(20)}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}); +} + +// SCC test: 25-node graph, one giant SCC (0-19 cycle) plus 5 isolated nodes +TEST(SCCGenericTest, SCCTest30) { + ExampleGraph Graph{// Giant cycle through 0..19 + {{NodeId(1)}, + {NodeId(2)}, + {NodeId(3)}, + {NodeId(4)}, + {NodeId(5)}, + {NodeId(6)}, + {NodeId(7)}, + {NodeId(8)}, + {NodeId(9)}, + {NodeId(10)}, + {NodeId(11)}, + {NodeId(12)}, + {NodeId(13)}, + {NodeId(14)}, + {NodeId(15)}, + {NodeId(16)}, + {NodeId(17)}, + {NodeId(18)}, + {NodeId(19)}, + {NodeId(0)}, + // Isolated nodes 20-24 + {}, + {}, + {}, + {}, + {}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + {20}, + {21}, + {22}, + {23}, + {24}}); +} + +// SCC test: 25-node graph, 5 SCC clusters of size 5, linked in DAG +TEST(SCCGenericTest, SCCTest31) { + ExampleGraph Graph{ + {// 0..24 + /* 0 */ { + NodeId(1), + NodeId(5)}, // cycle 0->1->2->3->4->0 and cross 0->5 (to cluster1) + /* 1 */ {NodeId(2)}, + /* 2 */ {NodeId(3)}, + /* 3 */ {NodeId(4)}, + /* 4 */ {NodeId(0)}, + /* 5 */ {NodeId(6)}, // cluster1 + /* 6 */ {NodeId(7), NodeId(10)}, // 6->7 and cross 6->10 (to cluster2) + /* 7 */ {NodeId(8)}, + /* 8 */ {NodeId(9)}, + /* 9 */ {NodeId(5)}, + /*10 */ {NodeId(11)}, // cluster2 + /*11 */ {NodeId(12)}, + /*12 */ {NodeId(13), NodeId(15)}, // 12->13 and cross 12->15 (to + // cluster3) + /*13 */ {NodeId(14)}, + /*14 */ {NodeId(10)}, + /*15 */ {NodeId(16)}, // cluster3 + /*16 */ {NodeId(17)}, + /*17 */ {NodeId(18), NodeId(20)}, // 17->18 and cross 17->20 (to + // cluster4) + /*18 */ {NodeId(19)}, + /*19 */ {NodeId(15)}, + /*20 */ {NodeId(21)}, // cluster4 + /*21 */ {NodeId(22)}, + /*22 */ {NodeId(23)}, + /*23 */ {NodeId(24)}, + /*24 */ {NodeId(20)}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}); +} + +// SCC test: 25-node graph, one giant SCC (0-19 cycle) plus 5 isolated nodes, +// with edges from the big SCC to the isolated nodes +TEST(SCCGenericTest, SCCTest32) { + ExampleGraph Graph{ + {// 0..24 + /* 0 */ {NodeId(1), + NodeId(20)}, // cycle 0->1->...->19->0 and extra 0->20 + /* 1 */ {NodeId(2)}, + /* 2 */ {NodeId(3)}, + /* 3 */ {NodeId(4)}, + /* 4 */ {NodeId(5)}, + /* 5 */ {NodeId(6), NodeId(21)}, // 5->6 and extra 5->21 + /* 6 */ {NodeId(7)}, + /* 7 */ {NodeId(8)}, + /* 8 */ {NodeId(9)}, + /* 9 */ {NodeId(10)}, + /*10 */ {NodeId(11), NodeId(22)}, // 10->11 and extra 10->22 + /*11 */ {NodeId(12)}, + /*12 */ {NodeId(13)}, + /*13 */ {NodeId(14)}, + /*14 */ {NodeId(15)}, + /*15 */ {NodeId(16), NodeId(23)}, // 15->16 and extra 15->23 + /*16 */ {NodeId(17)}, + /*17 */ {NodeId(18)}, + /*18 */ {NodeId(19)}, + /*19 */ {NodeId(0), NodeId(24)}, // 19->0 and extra 19->24 + /*20 */ {}, // isolated singletons + /*21 */ {}, + /*22 */ {}, + /*23 */ {}, + /*24 */ {}}}; + + computeSCCsAndCompare(Graph, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + {20}, + {21}, + {22}, + {23}, + {24}}); +} + +} // namespace + // main function for the test case int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); From 9b7bf9eea018ab5edb6d6123b20ccc227c3489be Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 2 Oct 2025 20:56:49 +0200 Subject: [PATCH 26/27] Also test recursive version of SCC computation --- include/phasar/Utils/SCCGeneric.h | 38 ++++++++++++++---------------- unittests/Utils/SCCGenericTest.cpp | 32 ++++++++++++++++++------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index 1663089ae6..55e85d6e34 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -488,17 +488,6 @@ pearce4VisitIt(const G &Graph, typename GraphTraits::vertex_t Start, break; } } - - if (!Data.Stack.empty()) { - auto NewSCC = SCCId(Holder.NodesInSCC.size()); - auto &Nodes = Holder.NodesInSCC.emplace_back(); - Nodes.reserve(Data.Stack.size()); - for (auto Vtx : Data.Stack) { - Nodes.push_back(Vtx); - Holder.SCCOfNode[Vtx] = NewSCC; - } - Data.Stack.clear(); - } } } // namespace detail @@ -525,17 +514,26 @@ computeSCCs(const G &Graph, std::bool_constant /*Iterative*/ = {}) { Ret.SCCOfNode.resize(N); - { - detail::Pearce4Data Data(N); + detail::Pearce4Data Data(N); - // for all v ∈ V do if rindex[v]==0 then visit(v) - for (auto V : GTraits::vertices(Graph)) { - if (Data.RIndex[V] == 0) { - if constexpr (Iterative) { - detail::pearce4VisitIt(Graph, V, Data, Ret); - } else { - detail::pearce4VisitRec(Graph, V, Data, Ret); + // for all v ∈ V do if rindex[v]==0 then visit(v) + for (auto V : GTraits::vertices(Graph)) { + if (Data.RIndex[V] == 0) { + if constexpr (Iterative) { + detail::pearce4VisitIt(Graph, V, Data, Ret); + } else { + detail::pearce4VisitRec(Graph, V, Data, Ret); + } + + if (!Data.Stack.empty()) { + auto NewSCC = SCCId(Ret.NodesInSCC.size()); + auto &Nodes = Ret.NodesInSCC.emplace_back(); + Nodes.reserve(Data.Stack.size()); + for (auto Vtx : Data.Stack) { + Nodes.push_back(Vtx); + Ret.SCCOfNode[Vtx] = NewSCC; } + Data.Stack.clear(); } } } diff --git a/unittests/Utils/SCCGenericTest.cpp b/unittests/Utils/SCCGenericTest.cpp index 8aa0fb1aeb..3e3f2943c5 100644 --- a/unittests/Utils/SCCGenericTest.cpp +++ b/unittests/Utils/SCCGenericTest.cpp @@ -18,6 +18,8 @@ #include "gtest/gtest.h" +#include + namespace { using namespace psr; @@ -73,12 +75,15 @@ static void compareSCCs(const SCCHolder &ComputedSCCs, } static void validateTopologicalOrder(const ExampleGraph &Graph, - const SCCHolder &ComputedSCCs) { + const SCCHolder &ComputedSCCs, + std::string_view ComputedName) { // Note: Pearce's algorithm produces SCCs in reverse-topological order for (auto [Vtx, SCC] : ComputedSCCs.SCCOfNode.enumerate()) { for (auto Succ : Graph.Adj[Vtx]) { auto SuccSCC = ComputedSCCs.SCCOfNode[Succ]; - EXPECT_LE(+SuccSCC, +SCC); + EXPECT_LE(+SuccSCC, +SCC) + << "Invalid topological order in " << ComputedName << ": SCC #" + << +SCC << " must come before #" << +SuccSCC; } } } @@ -86,21 +91,30 @@ static void validateTopologicalOrder(const ExampleGraph &Graph, static void computeSCCsAndCompare(ExampleGraph &Graph, llvm::ArrayRef> ExpectedSCCs) { - auto ComputedSCCs = computeSCCs(Graph); - ASSERT_EQ(ComputedSCCs.SCCOfNode.size(), Graph.Adj.size()) - << "Pearce's Approach did not reach all nodes\n"; + auto ComputedSCCsIt = computeSCCs(Graph); + auto ComputedSCCsRec = computeSCCs(Graph, std::false_type{}); + ASSERT_EQ(ComputedSCCsIt.SCCOfNode.size(), Graph.Adj.size()) + << "Iterative Pearce's Approach did not reach all nodes\n"; + ASSERT_EQ(ComputedSCCsIt.SCCOfNode.size(), Graph.Adj.size()) + << "Recursive Pearce's Approach did not reach all nodes\n"; #if __cplusplus >= 202002L - [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, ComputedSCCs); + [[maybe_unused]] auto SCCDeps = computeSCCDependencies(Graph, ComputedSCCsIt); static_assert(is_const_graph); #endif auto GroundTruth = makeGTSCCs(ExpectedSCCs); - compareSCCs(ComputedSCCs, GroundTruth, "Pearce"); - validateTopologicalOrder(Graph, ComputedSCCs); + compareSCCs(ComputedSCCsIt, GroundTruth, "Pearce Iterative"); + validateTopologicalOrder(Graph, ComputedSCCsIt, "Pearce Iterative"); + if (::testing::Test::HasFailure()) { + ComputedSCCsIt.print(Graph, llvm::outs(), "ExampleGraph"); + return; + } + compareSCCs(ComputedSCCsRec, GroundTruth, "Pearce Recursive"); + validateTopologicalOrder(Graph, ComputedSCCsRec, "Pearce Recursive"); if (::testing::Test::HasFailure()) { - ComputedSCCs.print(Graph, llvm::outs(), "ExampleGraph"); + ComputedSCCsRec.print(Graph, llvm::outs(), "ExampleGraph"); } } From ab665ab5e2480196642686d8b929b34baa8d1913 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 25 Oct 2025 19:07:01 +0200 Subject: [PATCH 27/27] Use AliasIterator in VTA call-graph analysis --- .../ControlFlow/Resolver/VTAResolver.h | 33 ++----------- .../ControlFlow/VTA/TypeAssignmentGraph.h | 13 ++--- .../ControlFlow/Resolver/VTAResolver.cpp | 34 ++----------- .../ControlFlow/VTA/TypeAssignmentGraph.cpp | 48 ++++++++++--------- 4 files changed, 39 insertions(+), 89 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h index 4146974f41..f643f0edd0 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/VTAResolver.h @@ -41,41 +41,16 @@ class VTAResolver : public Resolver { }; /// Constructs a VTAResolver with a given pre-computed call-graph and - /// call-back based alias-information (to-be-replaced by AliasIterator once - /// available #783) + /// alias-information /// /// Builds the type-assignment graph and propagates allocated types though /// it's SCCs. explicit VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + const LLVMVFTableProvider *VTP, LLVMAliasIteratorRef AS, MaybeUniquePtr BaseCG); - /// Constructs a VTAResolver with a given pre-computed call-graph and - /// LLVMAliasInfoRef alias-information. - /// - /// Builds the type-assignment graph and propagates allocated types though - /// it's SCCs. - explicit VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, - MaybeUniquePtr BaseCG); - - /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) - /// and call-back based alias-information (to-be-replaced by AliasIterator - /// once available #783). - /// Uses the optional parameter ReachableFunctions to consider only a subset - /// of all functions for building the type-assignment graph - /// - /// Builds the type-assignment graph and propagates allocated types though - /// it's SCCs. - explicit VTAResolver( - const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, - llvm::function_ref)> - ReachableFunctions = DefaultReachableFunctions{}); - /// Constructs a VTAResolver with a given base-resolver (no base-call-graph) - /// and LLVMAliasInfoRef alias-information. + /// and alias-information /// Uses the optional parameter ReachableFunctions to consider only a subset /// of all functions for building the type-assignment graph /// @@ -83,7 +58,7 @@ class VTAResolver : public Resolver { /// it's SCCs. explicit VTAResolver( const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, + LLVMAliasIteratorRef AS, MaybeUniquePtr BaseRes, llvm::function_ref)> ReachableFunctions = DefaultReachableFunctions{}); diff --git a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h index cdd07b3b41..0e4db14a17 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h +++ b/include/phasar/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.h @@ -154,18 +154,15 @@ struct TypeAssignmentGraph { void print(llvm::raw_ostream &OS); }; -using AliasHandlerTy = llvm::function_ref; -using AliasInfoTy = llvm::function_ref; - using ReachableFunsHandlerTy = llvm::function_ref; using ReachableFunsTy = llvm::function_ref; -// TODO: Use AliasIterator here, once available #783 -[[nodiscard]] TypeAssignmentGraph computeTypeAssignmentGraph( - const LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, - AliasInfoTy AS, Resolver &BaseRes, ReachableFunsTy ReachableFunctions); +[[nodiscard]] TypeAssignmentGraph +computeTypeAssignmentGraph(const LLVMProjectIRDB &IRDB, + const psr::LLVMVFTableProvider &VTP, + LLVMAliasIteratorRef AS, Resolver &BaseRes, + ReachableFunsTy ReachableFunctions); void printNode(llvm::raw_ostream &OS, TAGNode TN); }; // namespace psr::vta diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp index 7226938501..e73950191a 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/VTAResolver.cpp @@ -23,7 +23,7 @@ void VTAResolver::DefaultReachableFunctions::operator()( static VTAResolver createWithBaseCGResolver( const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - MaybeUniquePtr BaseCG, vta::AliasInfoTy AS) { + MaybeUniquePtr BaseCG, LLVMAliasIteratorRef AS) { auto ReachableFunctions = [BaseCG = BaseCG.get()]( const LLVMProjectIRDB &, @@ -37,14 +37,15 @@ static VTAResolver createWithBaseCGResolver( } VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, vta::AliasInfoTy AS, + const LLVMVFTableProvider *VTP, + LLVMAliasIteratorRef AS, MaybeUniquePtr BaseCG) : psr::VTAResolver( createWithBaseCGResolver(IRDB, VTP, std::move(BaseCG), AS)) {} VTAResolver::VTAResolver( const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - vta::AliasInfoTy AS, MaybeUniquePtr BaseRes, + LLVMAliasIteratorRef AS, MaybeUniquePtr BaseRes, llvm::function_ref)> ReachableFunctions) @@ -66,33 +67,6 @@ VTAResolver::VTAResolver( Nodes = std::move(TAG.Nodes); } -VTAResolver::VTAResolver(const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, LLVMAliasInfoRef AS, - MaybeUniquePtr BaseCG) - : VTAResolver( - IRDB, VTP, - [AS](const llvm::Value *Ptr, const llvm::Instruction *At, - vta::AliasHandlerTy WithAlias) { - auto ASet = AS.getAliasSet(Ptr, At); - llvm::for_each(*ASet, WithAlias); - }, - std::move(BaseCG)) {} - -VTAResolver::VTAResolver( - const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, - LLVMAliasInfoRef AS, MaybeUniquePtr BaseRes, - llvm::function_ref)> - ReachableFunctions) - : VTAResolver( - IRDB, VTP, - [AS](const llvm::Value *Ptr, const llvm::Instruction *At, - vta::AliasHandlerTy WithAlias) { - auto ASet = AS.getAliasSet(Ptr, At); - llvm::for_each(*ASet, WithAlias); - }, - std::move(BaseRes), ReachableFunctions) {} - std::string VTAResolver::str() const { return "VTA"; } void VTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, diff --git a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp index 9ca875bd37..706ecb4729 100644 --- a/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp +++ b/lib/PhasarLLVM/ControlFlow/VTA/TypeAssignmentGraph.cpp @@ -312,7 +312,8 @@ static void handleGEP(const llvm::GetElementPtrInst *GEP, } static bool handleEntryForStore(const llvm::StoreInst *Store, - TypeAssignmentGraph &TAG, AliasInfoTy AI, + TypeAssignmentGraph &TAG, + LLVMAliasIteratorRef AI, const llvm::DataLayout &DL) { const auto *Base = llvm::dyn_cast( Store->getValueOperand()->stripPointerCastsAndAliases()); @@ -337,21 +338,22 @@ static bool handleEntryForStore(const llvm::StoreInst *Store, } } - AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // XXX: Fuse store and GEP! + AI.forallAliasesOf(Store->getPointerOperand(), Store, + [&](const llvm::Value *Dest) { + // XXX: Fuse store and GEP! - auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) { - return; - } + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) { + return; + } - TAG.TypeEntryPoints[*DestNodeId].insert(Base); - }); + TAG.TypeEntryPoints[*DestNodeId].insert(Base); + }); return true; } static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, - AliasInfoTy AI, const llvm::DataLayout &DL) { + LLVMAliasIteratorRef AI, const llvm::DataLayout &DL) { if (handleEntryForStore(Store, TAG, AI, DL)) { return; @@ -378,16 +380,17 @@ static void handleStore(const llvm::StoreInst *Store, TypeAssignmentGraph &TAG, } } - AI(Store->getPointerOperand(), Store, [&](const llvm::Value *Dest) { - // XXX: Fuse store and GEP! + AI.forallAliasesOf(Store->getPointerOperand(), Store, + [&](const llvm::Value *Dest) { + // XXX: Fuse store and GEP! - auto DestNodeId = TAG.get({Variable{Dest}}); - if (!DestNodeId) { - return; - } + auto DestNodeId = TAG.get({Variable{Dest}}); + if (!DestNodeId) { + return; + } - TAG.addEdge(*From, *DestNodeId); - }); + TAG.addEdge(*From, *DestNodeId); + }); } static void handleLoad(const llvm::LoadInst *Load, TypeAssignmentGraph &TAG, @@ -543,7 +546,7 @@ static void handleReturn(const llvm::ReturnInst *Ret, } static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, - Resolver &BaseRes, AliasInfoTy AI, + Resolver &BaseRes, LLVMAliasIteratorRef AI, const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { if (llvm::isa(&I)) { @@ -590,7 +593,7 @@ static void dispatch(const llvm::Instruction &I, TypeAssignmentGraph &TAG, } static void buildTAGWithFun(const llvm::Function *Fun, TypeAssignmentGraph &TAG, - Resolver &BaseRes, AliasInfoTy AI, + Resolver &BaseRes, LLVMAliasIteratorRef AI, const llvm::DataLayout &DL, const psr::LLVMVFTableProvider &VTP) { for (const auto &I : llvm::instructions(Fun)) { @@ -599,7 +602,8 @@ static void buildTAGWithFun(const llvm::Function *Fun, TypeAssignmentGraph &TAG, } static auto computeTypeAssignmentGraphImpl(const LLVMProjectIRDB &IRDB, - Resolver &BaseRes, AliasInfoTy AI, + Resolver &BaseRes, + LLVMAliasIteratorRef AI, const psr::LLVMVFTableProvider &VTP, ReachableFunsTy ReachableFunctions) -> TypeAssignmentGraph { @@ -626,7 +630,7 @@ static auto computeTypeAssignmentGraphImpl(const LLVMProjectIRDB &IRDB, auto vta::computeTypeAssignmentGraph(const LLVMProjectIRDB &IRDB, const psr::LLVMVFTableProvider &VTP, - AliasInfoTy AS, Resolver &BaseRes, + LLVMAliasIteratorRef AS, Resolver &BaseRes, ReachableFunsTy ReachableFunctions) -> TypeAssignmentGraph {