Skip to content

Commit fb14d7c

Browse files
authored
fix: Implement Substrait consumer support for like_match, like_imatch, and negated variants (#18929)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #16293 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Currently, DataFusion fails to consume Substrait plans that utilize `like_match`, `like_imatch`, or their negated variants (`like_not_match`, `like_not_imatch`). This results in a panic with `DataFusion error: This feature is not implemented: Unsupported function name: "like_match"` during round-trip planning. This PR implements the missing mapping logic in the Substrait consumer to correctly translate these function names into DataFusion `Expr::Like` expressions. ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> - Updated `BuiltinExprBuilder` in `scalar_function.rs` to recognize: - `like_match` (LIKE) - `like_imatch` (ILIKE) - `like_not_match` (NOT LIKE) - `like_not_imatch` (NOT ILIKE) - Modified the internal `build_like_expr` helper to accept a `negated` boolean flag (previously hardcoded to `false`). - Added a new unit test `test_like_match_conversion` to verify the correct conversion of these functions and their arguments. ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yes. I added a new unit test `test_like_match_conversion` in `datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs`. This test explicitly constructs Substrait plans for `like_match` and `like_not_match` and asserts that they are consumed into correct DataFusion `Expr::Like` variants with the appropriate `negated` and `case_insensitive` flags. **Note: Verification was done via unit test rather than the standard `sqllogictest` suite because `predicates.slt` was experiencing unrelated environment-specific failures locally.** ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> - No breaking changes. - Users relying on Substrait round-trips for LIKE/ILIKE expressions will now see these queries succeed instead of failing.
1 parent c6f7363 commit fb14d7c

File tree

1 file changed

+103
-5
lines changed

1 file changed

+103
-5
lines changed

datafusion/substrait/src/logical_plan/consumer/expr/scalar_function.rs

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,9 @@ struct BuiltinExprBuilder {
177177
impl BuiltinExprBuilder {
178178
pub fn try_from_name(name: &str) -> Option<Self> {
179179
match name {
180-
"not" | "like" | "ilike" | "is_null" | "is_not_null" | "is_true"
181-
| "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
180+
"not" | "like" | "ilike" | "like_match" | "like_imatch"
181+
| "like_not_match" | "like_not_imatch" | "is_null" | "is_not_null"
182+
| "is_true" | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
182183
| "is_not_unknown" | "negative" | "negate" | "and_not" | "xor"
183184
| "between" | "logb" => Some(Self {
184185
expr_name: name.to_string(),
@@ -194,8 +195,12 @@ impl BuiltinExprBuilder {
194195
args: Vec<Expr>,
195196
) -> Result<Expr> {
196197
match self.expr_name.as_str() {
197-
"like" => Self::build_like_expr(false, f, args).await,
198-
"ilike" => Self::build_like_expr(true, f, args).await,
198+
"like" => Self::build_like_expr(false, false, f, args).await,
199+
"ilike" => Self::build_like_expr(true, false, f, args).await,
200+
"like_match" => Self::build_like_expr(false, false, f, args).await,
201+
"like_imatch" => Self::build_like_expr(true, false, f, args).await,
202+
"like_not_match" => Self::build_like_expr(false, true, f, args).await,
203+
"like_not_imatch" => Self::build_like_expr(true, true, f, args).await,
199204
"not" | "negative" | "negate" | "is_null" | "is_not_null" | "is_true"
200205
| "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
201206
| "is_not_unknown" => Self::build_unary_expr(&self.expr_name, args).await,
@@ -236,6 +241,7 @@ impl BuiltinExprBuilder {
236241

237242
async fn build_like_expr(
238243
case_insensitive: bool,
244+
negated: bool,
239245
f: &ScalarFunction,
240246
args: Vec<Expr>,
241247
) -> Result<Expr> {
@@ -274,7 +280,7 @@ impl BuiltinExprBuilder {
274280
};
275281

276282
Ok(Expr::Like(Like {
277-
negated: false,
283+
negated,
278284
expr: Box::new(expr),
279285
pattern: Box::new(pattern),
280286
escape_char,
@@ -476,4 +482,96 @@ mod tests {
476482
let _ = consumer.consume_scalar_function(&func, &df_schema).await?;
477483
Ok(())
478484
}
485+
486+
#[tokio::test]
487+
async fn test_like_match_conversion() -> Result<()> {
488+
// 1. Setup the consumer with the "like_match" function registered
489+
let mut extensions = Extensions::default();
490+
extensions
491+
.functions
492+
.insert(0, "like_match:str_str".to_string());
493+
extensions
494+
.functions
495+
.insert(1, "like_not_match:str_str".to_string());
496+
extensions
497+
.functions
498+
.insert(2, "like_imatch:str_str".to_string());
499+
500+
let consumer = DefaultSubstraitConsumer::new(&extensions, &TEST_SESSION_STATE);
501+
502+
// 2. Create the arguments (column "a" and pattern "%foo%")
503+
let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
504+
let df_schema = DFSchema::try_from(schema).unwrap();
505+
506+
let col_arg = FunctionArgument {
507+
arg_type: Some(ArgType::Value(Expression {
508+
rex_type: Some(RexType::Selection(Box::new(
509+
substrait::proto::expression::FieldReference {
510+
reference_type: Some(substrait::proto::expression::field_reference::ReferenceType::DirectReference(
511+
substrait::proto::expression::ReferenceSegment {
512+
reference_type: Some(substrait::proto::expression::reference_segment::ReferenceType::StructField(
513+
Box::new(substrait::proto::expression::reference_segment::StructField {
514+
field: 0,
515+
child: None,
516+
})
517+
)),
518+
}
519+
)),
520+
root_type: Some(substrait::proto::expression::field_reference::RootType::RootReference(
521+
substrait::proto::expression::field_reference::RootReference {}
522+
)),
523+
}
524+
))),
525+
})),
526+
};
527+
528+
let pattern_arg = FunctionArgument {
529+
arg_type: Some(ArgType::Value(Expression {
530+
rex_type: Some(RexType::Literal(Literal {
531+
nullable: false,
532+
type_variation_reference: 0,
533+
literal_type: Some(LiteralType::String("foo".to_string())),
534+
})),
535+
})),
536+
};
537+
538+
// 3. Test "like_match" (Standard LIKE)
539+
let func_like = ScalarFunction {
540+
function_reference: 0,
541+
arguments: vec![col_arg.clone(), pattern_arg.clone()],
542+
..Default::default()
543+
};
544+
545+
let result = consumer
546+
.consume_scalar_function(&func_like, &df_schema)
547+
.await?;
548+
549+
if let Expr::Like(like) = result {
550+
assert!(!like.negated);
551+
assert!(!like.case_insensitive);
552+
assert_eq!(format!("{}", like.pattern), "Utf8(\"foo\")");
553+
} else {
554+
panic!("Expected Expr::Like, got {result:?}");
555+
}
556+
557+
// 4. Test "like_not_match" (NOT LIKE)
558+
let func_not_like = ScalarFunction {
559+
function_reference: 1,
560+
arguments: vec![col_arg.clone(), pattern_arg.clone()],
561+
..Default::default()
562+
};
563+
564+
let result = consumer
565+
.consume_scalar_function(&func_not_like, &df_schema)
566+
.await?;
567+
568+
if let Expr::Like(like) = result {
569+
assert!(like.negated);
570+
assert!(!like.case_insensitive);
571+
} else {
572+
panic!("Expected Expr::Like (negated), got {result:?}");
573+
}
574+
575+
Ok(())
576+
}
479577
}

0 commit comments

Comments
 (0)