Skip to content

Commit 3b24ab6

Browse files
authored
[Olap projection pushdown] Keep any expression for json_value in compute (#28421)
1 parent f29a32a commit 3b24ab6

File tree

2 files changed

+78
-53
lines changed

2 files changed

+78
-53
lines changed

ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -794,67 +794,64 @@ std::pair<TVector<TOLAPPredicateNode>, TVector<TOLAPPredicateNode>> SplitForPart
794794
return {pushable, remaining};
795795
}
796796

797-
bool IsSuitableToCollectProjection(TExprBase node) {
798-
// Currently support only `JsonDocument`.
799-
if (auto maybeJsonValue = node.Maybe<TCoJsonValue>()) {
800-
auto jsonMember = maybeJsonValue.Cast().Json().Maybe<TCoMember>();
801-
auto jsonPath = maybeJsonValue.Cast().JsonPath().Maybe<TCoUtf8>();
802-
return jsonMember && jsonPath;
803-
}
804-
return false;
797+
TExprNode::TPtr IsSuitableToCollectProjection(TExprNode::TPtr node) {
798+
// Currently support only `JsonValue`.
799+
auto jsonValuePred = [](const TExprNode::TPtr& node) -> bool { return !!TMaybeNode<TCoJsonValue>(node); };
800+
if (auto jsonValues = FindNodes(node, jsonValuePred); jsonValues.size() == 1) {
801+
auto jsonValue = TExprBase(jsonValues.front()).Cast<TCoJsonValue>();
802+
return jsonValue.Json().Maybe<TCoMember>() && jsonValue.JsonPath().Maybe<TCoUtf8>() ? jsonValue.Ptr() : nullptr;
803+
}
804+
return nullptr;
805805
}
806806

807807
// Collects all operations for projections and returns a vector of pair - [columName, olap operation].
808808
TVector<std::pair<TString, TExprNode::TPtr>> CollectOlapOperationsForProjections(const TExprNode::TPtr& node, const TExprNode& arg,
809809
TNodeOnNodeOwnedMap& replaces,
810810
const THashSet<TString>& predicateMembers,
811811
TExprContext& ctx) {
812+
TVector<std::pair<TString, TExprNode::TPtr>> olapOperationsForProjections;
812813
auto asStructPred = [](const TExprNode::TPtr& node) -> bool { return !!TMaybeNode<TCoAsStruct>(node); };
813-
auto memberPred = [](const TExprNode::TPtr& node) { return !!TMaybeNode<TCoMember>(node); };
814+
auto memberPred = [](const TExprNode::TPtr& node) -> bool { return !!TMaybeNode<TCoMember>(node); };
814815
THashSet<TString> projectionMembers;
815816
THashSet<TString> notSuitableToPushMembers;
816817
ui32 nextMemberId = 0;
817818

818-
TVector<std::pair<TString, TExprNode::TPtr>> olapOperationsForProjections;
819+
TVector<std::tuple<TString, TExprNode::TPtr, TExprNode::TPtr, TExprNode::TPtr>> projectionCandidates;
819820
// Expressions for projections are placed in `AsStruct` callable.
820821
if (auto asStruct = FindNode(node, asStructPred)) {
821822
// Process each child for `AsStruct` callable.
822823
for (auto child : TExprBase(asStruct).Cast<TCoAsStruct>()) {
823824
bool memberCollected = false;
824-
if (IsSuitableToCollectProjection(child.Item(1))) {
825-
// Search for the `TCoMember` in expression, we need expression with only one `TCoMember`.
826-
if (auto originalMembers = FindNodes(child.Item(1).Ptr(), memberPred); originalMembers.size() == 1) {
827-
// Convert YQL op to OLAP op.
828-
if (auto olapOperations = ConvertComparisonNode(TExprBase(child.Item(1)), arg, ctx, node->Pos(), false);
829-
olapOperations.size() == 1) {
830-
auto originalMember = TExprBase(originalMembers.front()).Cast<TCoMember>();
831-
auto originalMemberName = TString(originalMember.Name());
832-
833-
if (!predicateMembers.contains(originalMemberName)) {
834-
if (projectionMembers.contains(originalMemberName)) {
835-
originalMemberName = "__kqp_olap_projection_" + originalMemberName + ToString(nextMemberId++);
836-
} else {
837-
projectionMembers.insert(originalMemberName);
838-
}
839-
840-
// clang-format off
841-
auto newMember = Build<TCoMember>(ctx, node->Pos())
842-
.Struct(originalMember.Struct())
843-
.Name<TCoAtom>()
844-
.Value(originalMemberName)
845-
.Build()
846-
.Done();
847-
// clang-format on
848-
849-
auto olapOperation = olapOperations.front();
850-
// Replace full expression with only member.
851-
replaces[child.Item(1).Raw()] = newMember.Ptr();
852-
olapOperationsForProjections.emplace_back(TString(newMember.Name()), olapOperation.Ptr());
853-
memberCollected = true;
854-
855-
YQL_CLOG(TRACE, ProviderKqp)
856-
<< "[OLAP PROJECTION] Operation in olap dialect: " << KqpExprToPrettyString(olapOperation, ctx);
825+
TExprNode::TPtr nodeToProcess = child.Item(1).Ptr();
826+
if (auto projection = IsSuitableToCollectProjection(nodeToProcess)) {
827+
if (auto olapOperations = ConvertComparisonNode(TExprBase(projection), arg, ctx, node->Pos(), false);
828+
olapOperations.size() == 1) {
829+
830+
Y_ENSURE(TMaybeNode<TCoMember>(projection->ChildPtr(0)));
831+
auto originalMember = TExprBase(projection->ChildPtr(0)).Cast<TCoMember>();
832+
auto originalMemberName = TString(originalMember.Name());
833+
834+
if (!predicateMembers.contains(originalMemberName)) {
835+
if (projectionMembers.contains(originalMemberName)) {
836+
originalMemberName = "__kqp_olap_projection_" + originalMemberName + ToString(nextMemberId++);
837+
} else {
838+
projectionMembers.insert(originalMemberName);
857839
}
840+
841+
// clang-format off
842+
auto replace = Build<TCoMember>(ctx, node->Pos())
843+
.Struct(originalMember.Struct())
844+
.Name<TCoAtom>()
845+
.Value(originalMemberName)
846+
.Build()
847+
.Done().Ptr();
848+
// clang-format on
849+
850+
auto olapOperation = olapOperations.front();
851+
projectionCandidates.push_back({TString(originalMemberName), projection, replace, olapOperation.Ptr()});
852+
memberCollected = true;
853+
YQL_CLOG(TRACE, ProviderKqp)
854+
<< "[OLAP PROJECTION] Operation in olap dialect: " << KqpExprToPrettyString(olapOperation, ctx);
858855
}
859856
}
860857
}
@@ -867,9 +864,10 @@ TVector<std::pair<TString, TExprNode::TPtr>> CollectOlapOperationsForProjections
867864
}
868865
}
869866

870-
for (const auto& [colName, expr] : olapOperationsForProjections) {
871-
if (notSuitableToPushMembers.count(colName)) {
872-
return {};
867+
for (const auto& [colName, projection, replace, olapOperation] : projectionCandidates) {
868+
if (!notSuitableToPushMembers.count(colName)) {
869+
replaces[TExprBase(projection).Raw()] = replace;
870+
olapOperationsForProjections.emplace_back(colName, olapOperation);
873871
}
874872
}
875873

ydb/core/kqp/ut/olap/kqp_olap_ut.cpp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,6 +1771,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
17711771
b Int32,
17721772
timestamp Timestamp,
17731773
jsonDoc JsonDocument,
1774+
jsonDoc1 JsonDocument,
17741775
primary key(a)
17751776
)
17761777
PARTITION BY HASH(a)
@@ -1780,12 +1781,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
17801781

17811782

17821783
auto insertRes = session2.ExecuteQuery(R"(
1783-
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc)
1784-
VALUES (1, 1, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"a.b.c" : "a1", "b.c.d" : "b1", "c.d.e" : "c1"}'));
1785-
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc)
1786-
VALUES (2, 11, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"a.b.c" : "a2", "b.c.d" : "b2", "c.d.e" : "c2"}'));
1787-
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc)
1788-
VALUES (3, 11, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"b.c.a" : "a3", "b.c.d" : "b3", "c.d.e" : "c3"}'));
1784+
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc, jsonDoc1)
1785+
VALUES (1, 1, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"a.b.c" : "a1", "b.c.d" : "b1", "c.d.e" : "c1"}'), JsonDocument('{"a" : "1.1", "b" : "1.2", "c" : "1.3"}'));
1786+
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc, jsonDoc1)
1787+
VALUES (2, 11, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"a.b.c" : "a2", "b.c.d" : "b2", "c.d.e" : "c2"}'), JsonDocument('{"a" : "2.1", "b" : "2.2", "c" : "2.3"}'));
1788+
INSERT INTO `/Root/foo` (a, b, timestamp, jsonDoc, jsonDoc1)
1789+
VALUES (3, 11, Timestamp("1970-01-01T00:00:03.000001Z"), JsonDocument('{"b.c.a" : "a3", "b.c.d" : "b3", "c.d.e" : "c3"}'), JsonDocument('{"x" : "3.1", "y" : "1.2", "z" : "1.3"}'));
17891790
)", NYdb::NQuery::TTxControl::NoTx()).GetValueSync();
17901791
UNIT_ASSERT(insertRes.IsSuccess());
17911792

@@ -1830,6 +1831,29 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
18301831
SELECT a, JSON_VALUE(jsonDoc, "$.\"a.b.c\""), JSON_VALUE(jsonDoc, "$.\"b.c.d\""), JSON_VALUE(jsonDoc, "$.\"c.d.e\"")
18311832
FROM `/Root/foo`
18321833
ORDER BY a;
1834+
)",
1835+
R"(
1836+
PRAGMA Kikimr.OptEnableOlapPushdownProjections = "true";
1837+
1838+
SELECT CAST(JSON_VALUE(jsonDoc1, "$.\"a\"") as Double) as result
1839+
FROM `/Root/foo`
1840+
ORDER BY result;
1841+
)",
1842+
R"(
1843+
PRAGMA Kikimr.OptEnableOlapPushdownProjections = "true";
1844+
1845+
SELECT (JSON_VALUE(jsonDoc, "$.\"a.b.c\"") in ["a1", "a3", "a4"]) as col1, CAST(JSON_VALUE(jsonDoc1, "$.\"a\"") as Double) as col2
1846+
FROM `/Root/foo`
1847+
ORDER BY col2;
1848+
)",
1849+
R"(
1850+
PRAGMA Kikimr.OptEnableOlapPushdownProjections = "true";
1851+
1852+
SELECT (JSON_VALUE(jsonDoc, "$.\"a.b.c\"") in ["a1", "a3", "a4"]) as col1,
1853+
CAST(JSON_VALUE(jsonDoc1, "$.\"a\"") as Double) as col2,
1854+
CAST(JSON_VALUE(jsonDoc1, "$.\"b\"") as Double) as col3
1855+
FROM `/Root/foo`
1856+
ORDER BY col2;
18331857
)"
18341858
};
18351859

@@ -1838,7 +1862,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
18381862
R"([[[3000001u];["a1"];1u];[[3000001u];["a2"];1u]])",
18391863
R"([[[3000001u];#;1u];[[3000001u];["a1"];1u];[[3000001u];["a2"];1u]])",
18401864
R"([[[3000001u];["a1"];1u];[[3000001u];["a2"];1u]])",
1841-
R"([[1;["a1"];["b1"];["c1"]];[2;["a2"];["b2"];["c2"]];[3;#;["b3"];["c3"]]])"
1865+
R"([[1;["a1"];["b1"];["c1"]];[2;["a2"];["b2"];["c2"]];[3;#;["b3"];["c3"]]])",
1866+
R"([[#];[[1.1]];[[2.1]]])",
1867+
R"([[#;#];[[%true];[1.1]];[[%false];[2.1]]])",
1868+
R"([[#;#;#];[[%true];[1.1];[1.2]];[[%false];[2.1];[2.2]]])"
18421869
};
18431870

18441871
for (ui32 i = 0; i < queries.size(); ++i) {

0 commit comments

Comments
 (0)