@@ -478,108 +478,106 @@ data_aggregation AS (
478478)
479479SELECT * FROM data_aggregation;
480480----
481- EvalScalar
482- ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)]
483- ├── expressions: [group_item (#23), group_item (#24), group_item (#25)]
484- ├── estimated rows: 0.04
485- └── AggregateFinal
486- ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)]
487- ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date]
488- ├── aggregate functions: []
481+ Sequence
482+ ├── MaterializedCTE: cte_cse_0
483+ │ └── TableScan
484+ │ ├── table: default.test_virtual_db.data_source_a
485+ │ ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)]
486+ │ ├── read rows: 1
487+ │ ├── read size: < 1 KiB
488+ │ ├── partitions total: 1
489+ │ ├── partitions scanned: 1
490+ │ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
491+ │ ├── push downs: [filters: [], limit: NONE]
492+ │ ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']]
493+ │ └── estimated rows: 1.00
494+ └── EvalScalar
495+ ├── output columns: [a.entity_id (#0), a.source_id (#1), event_date (#22), type_code (#26), primary_category (#27), secondary_category (#28)]
496+ ├── expressions: [group_item (#23), group_item (#24), group_item (#25)]
489497 ├── estimated rows: 0.04
490- └── AggregatePartial
498+ └── AggregateFinal
499+ ├── output columns: [a.entity_id (#0), a.source_id (#1), type_code (#23), primary_category (#24), secondary_category (#25), event_date (#22)]
491500 ├── group by: [entity_id, source_id, type_code, primary_category, secondary_category, event_date]
492501 ├── aggregate functions: []
493502 ├── estimated rows: 0.04
494- └── EvalScalar
495- ├── output columns : [a. entity_id (#0), a. source_id (#1), event_date (#22), type_code (#23) , primary_category (#24) , secondary_category (#25) ]
496- ├── expressions : [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL) ]
503+ └── AggregatePartial
504+ ├── group by : [entity_id, source_id, type_code, primary_category, secondary_category, event_date ]
505+ ├── aggregate functions : []
497506 ├── estimated rows: 0.04
498- └── HashJoin
499- ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)]
500- ├── join type: INNER
501- ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)]
502- ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)]
503- ├── keys is null equal: [false, false, false]
504- ├── filters: []
505- ├── build join filters:
506- │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max
507- │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max
507+ └── EvalScalar
508+ ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22), type_code (#23), primary_category (#24), secondary_category (#25)]
509+ ├── expressions: [if(CAST(is_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS Boolean NULL), CAST(assume_not_null(CAST(a.metadata_object['type'] (#5) AS String NULL)) AS String NULL), true, 'Unknown', NULL), CAST(a.content_object['category_a'] (#6) AS String NULL), CAST(a.content_object['category_b'] (#7) AS String NULL)]
508510 ├── estimated rows: 0.04
509- ├── EvalScalar(Build)
510- │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)]
511- │ ├── expressions: [group_item (#21)]
512- │ ├── estimated rows: 0.20
513- │ └── AggregateFinal
514- │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)]
515- │ ├── group by: [entity_id, source_id, event_date]
516- │ ├── aggregate functions: []
517- │ ├── estimated rows: 0.20
518- │ └── AggregatePartial
519- │ ├── group by: [entity_id, source_id, event_date]
520- │ ├── aggregate functions: []
521- │ ├── estimated rows: 0.20
522- │ └── EvalScalar
523- │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)]
524- │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)]
525- │ ├── estimated rows: 0.20
526- │ └── HashJoin
527- │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)]
528- │ ├── join type: INNER
529- │ ├── build keys: [a.entity_id (#9), a.source_id (#10)]
530- │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)]
531- │ ├── keys is null equal: [false, false]
532- │ ├── filters: []
533- │ ├── build join filters:
534- │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max
535- │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max
536- │ ├── estimated rows: 0.20
537- │ ├── Filter(Build)
538- │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)]
539- │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))]
540- │ │ ├── estimated rows: 0.20
541- │ │ └── TableScan
542- │ │ ├── table: default.test_virtual_db.data_source_a
543- │ │ ├── output columns: [entity_id (#9), source_id (#10), content_object['event_date'] (#17)]
544- │ │ ├── read rows: 1
545- │ │ ├── read size: < 1 KiB
546- │ │ ├── partitions total: 1
547- │ │ ├── partitions scanned: 1
548- │ │ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
549- │ │ ├── push downs: [filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))], limit: NONE]
550- │ │ ├── virtual columns: [content_object['event_date']]
551- │ │ └── estimated rows: 1.00
552- │ └── Filter(Probe)
553- │ ├── output columns: [c.entity_id (#18), c.source_id (#19)]
554- │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')]
555- │ ├── estimated rows: 1.00
556- │ └── TableScan
557- │ ├── table: default.test_virtual_db.config_table
558- │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)]
559- │ ├── read rows: 1
560- │ ├── read size: < 1 KiB
561- │ ├── partitions total: 1
562- │ ├── partitions scanned: 1
563- │ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1, bloom pruning: 1 to 1>]
564- │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE]
565- │ ├── apply join filters: [#0, #1]
566- │ └── estimated rows: 1.00
567- └── Filter(Probe)
568- ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)]
569- ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))]
570- ├── estimated rows: 0.20
571- └── TableScan
572- ├── table: default.test_virtual_db.data_source_a
573- ├── output columns: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)]
574- ├── read rows: 1
575- ├── read size: < 1 KiB
576- ├── partitions total: 1
577- ├── partitions scanned: 1
578- ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
579- ├── push downs: [filters: [and_filters(and_filters(is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL))), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)))], limit: NONE]
580- ├── apply join filters: [#2, #3]
581- ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']]
582- └── estimated rows: 1.00
511+ └── HashJoin
512+ ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), event_date (#22)]
513+ ├── join type: INNER
514+ ├── build keys: [p.entity_id (#9), p.source_id (#10), p.event_date (#22)]
515+ ├── probe keys: [a.entity_id (#0), a.source_id (#1), CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)]
516+ ├── keys is null equal: [false, false, false]
517+ ├── filters: []
518+ ├── build join filters:
519+ │ ├── filter id:2, build key:p.entity_id (#9), probe key:a.entity_id (#0), filter type:inlist,min_max
520+ │ └── filter id:3, build key:p.source_id (#10), probe key:a.source_id (#1), filter type:inlist,min_max
521+ ├── estimated rows: 0.04
522+ ├── EvalScalar(Build)
523+ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#22)]
524+ │ ├── expressions: [group_item (#21)]
525+ │ ├── estimated rows: 0.20
526+ │ └── AggregateFinal
527+ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), event_date (#21)]
528+ │ ├── group by: [entity_id, source_id, event_date]
529+ │ ├── aggregate functions: []
530+ │ ├── estimated rows: 0.20
531+ │ └── AggregatePartial
532+ │ ├── group by: [entity_id, source_id, event_date]
533+ │ ├── aggregate functions: []
534+ │ ├── estimated rows: 0.20
535+ │ └── EvalScalar
536+ │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10), event_date (#21)]
537+ │ ├── expressions: [CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)]
538+ │ ├── estimated rows: 0.20
539+ │ └── HashJoin
540+ │ ├── output columns: [a.content_object['event_date'] (#17), a.entity_id (#9), a.source_id (#10)]
541+ │ ├── join type: INNER
542+ │ ├── build keys: [a.entity_id (#9), a.source_id (#10)]
543+ │ ├── probe keys: [c.entity_id (#18), c.source_id (#19)]
544+ │ ├── keys is null equal: [false, false]
545+ │ ├── filters: []
546+ │ ├── build join filters:
547+ │ │ ├── filter id:0, build key:a.entity_id (#9), probe key:c.entity_id (#18), filter type:inlist,min_max
548+ │ │ └── filter id:1, build key:a.source_id (#10), probe key:c.source_id (#19), filter type:inlist,min_max
549+ │ ├── estimated rows: 0.20
550+ │ ├── Filter(Build)
551+ │ │ ├── output columns: [a.entity_id (#9), a.source_id (#10), a.content_object['event_date'] (#17)]
552+ │ │ ├── filters: [is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#17) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))]
553+ │ │ ├── estimated rows: 0.20
554+ │ │ └── MaterializeCTERef
555+ │ │ ├── cte_name: cte_cse_0
556+ │ │ ├── cte_schema: [entity_id (#9), source_id (#10), metadata_object['type'] (#14), content_object['category_a'] (#15), content_object['category_b'] (#16), content_object['event_date'] (#17)]
557+ │ │ └── estimated rows: 1.00
558+ │ └── Filter(Probe)
559+ │ ├── output columns: [c.entity_id (#18), c.source_id (#19)]
560+ │ ├── filters: [is_true(c.process_mode (#20) = 'standard_mode')]
561+ │ ├── estimated rows: 1.00
562+ │ └── TableScan
563+ │ ├── table: default.test_virtual_db.config_table
564+ │ ├── output columns: [entity_id (#18), source_id (#19), process_mode (#20)]
565+ │ ├── read rows: 1
566+ │ ├── read size: < 1 KiB
567+ │ ├── partitions total: 1
568+ │ ├── partitions scanned: 1
569+ │ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1, bloom pruning: 1 to 1>]
570+ │ ├── push downs: [filters: [is_true(config_table.process_mode (#20) = 'standard_mode')], limit: NONE]
571+ │ ├── apply join filters: [#0, #1]
572+ │ └── estimated rows: 1.00
573+ └── Filter(Probe)
574+ ├── output columns: [a.entity_id (#0), a.source_id (#1), a.metadata_object['type'] (#5), a.content_object['category_a'] (#6), a.content_object['category_b'] (#7), a.content_object['event_date'] (#8)]
575+ ├── filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))]
576+ ├── estimated rows: 0.20
577+ └── MaterializeCTERef
578+ ├── cte_name: cte_cse_0
579+ ├── cte_schema: [entity_id (#0), source_id (#1), metadata_object['type'] (#5), content_object['category_a'] (#6), content_object['category_b'] (#7), content_object['event_date'] (#8)]
580+ └── estimated rows: 1.00
583581
584582query TTTTTT
585583WITH processed_dates AS (
0 commit comments