Skip to content

Commit 1b051b9

Browse files
committed
feat: support ListView and LargeListView in ScalarValue
1 parent 6ed22bb commit 1b051b9

File tree

11 files changed

+288
-33
lines changed

11 files changed

+288
-33
lines changed

datafusion/common/src/scalar/mod.rs

Lines changed: 134 additions & 20 deletions
Large diffs are not rendered by default.

datafusion/common/src/utils/mod.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ use arrow::array::{
2929
cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
3030
OffsetSizeTrait,
3131
};
32-
use arrow::buffer::OffsetBuffer;
32+
use arrow::array::{LargeListViewArray, ListViewArray};
33+
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
3334
use arrow::compute::{partition, SortColumn, SortOptions};
3435
use arrow::datatypes::{DataType, Field, SchemaRef};
3536
#[cfg(feature = "sql")]
@@ -479,6 +480,32 @@ impl SingleRowListArrayBuilder {
479480
ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
480481
}
481482

483+
/// Build a single element [`ListViewArray`]
484+
pub fn build_list_view_array(self) -> ListViewArray {
485+
let (field, arr) = self.into_field_and_arr();
486+
let offsets = ScalarBuffer::from(vec![0]);
487+
let sizes = ScalarBuffer::from(vec![arr.len() as i32]);
488+
ListViewArray::new(field, offsets, sizes, arr, None)
489+
}
490+
491+
/// Build a single element [`ListViewArray`] and wrap as [`ScalarValue::ListView`]
492+
pub fn build_list_view_scalar(self) -> ScalarValue {
493+
ScalarValue::ListView(Arc::new(self.build_list_view_array()))
494+
}
495+
496+
/// Build a single element [`LargeListViewArray`]
497+
pub fn build_large_list_view_array(self) -> LargeListViewArray {
498+
let (field, arr) = self.into_field_and_arr();
499+
let offsets = ScalarBuffer::from(vec![0]);
500+
let sizes = ScalarBuffer::from(vec![arr.len() as i64]);
501+
LargeListViewArray::new(field, offsets, sizes, arr, None)
502+
}
503+
504+
/// Build a single element [`LargeListViewArray`] and wrap as [`ScalarValue::LargeListView`]
505+
pub fn build_large_list_view_scalar(self) -> ScalarValue {
506+
ScalarValue::LargeListView(Arc::new(self.build_large_list_view_array()))
507+
}
508+
482509
/// Helper function: convert this builder into a tuple of field and array
483510
fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
484511
let Self {

datafusion/expr-common/src/signature.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,14 +456,14 @@ impl TypeSignatureClass {
456456

457457
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
458458
pub enum ArrayFunctionSignature {
459-
/// A function takes at least one List/LargeList/FixedSizeList argument.
459+
/// A function takes at least one List/LargeList/FixedSizeList/ListView/LargeListView argument.
460460
Array {
461461
/// A full list of the arguments accepted by this function.
462462
arguments: Vec<ArrayFunctionArgument>,
463463
/// Additional information about how array arguments should be coerced.
464464
array_coercion: Option<ListCoercion>,
465465
},
466-
/// A function takes a single argument that must be a List/LargeList/FixedSizeList
466+
/// A function takes a single argument that must be a List/LargeList/FixedSizeList/ListView/LargeListView
467467
/// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
468468
RecursiveArray,
469469
/// Specialized Signature for MapArray
@@ -500,8 +500,8 @@ pub enum ArrayFunctionArgument {
500500
Element,
501501
/// An Int64 index argument.
502502
Index,
503-
/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
504-
/// to the same type.
503+
/// An argument of type List/LargeList/FixedSizeList/ListView/LargeListView.
504+
/// All Array arguments must be coercible to the same type.
505505
Array,
506506
// A Utf8 argument.
507507
String,

datafusion/proto-common/proto/datafusion_common.proto

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ message Union{
194194
repeated int32 type_ids = 3;
195195
}
196196

197-
// Used for List/FixedSizeList/LargeList/Struct/Map
197+
// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map
198198
message ScalarNestedValue {
199199
message Dictionary {
200200
bytes ipc_message = 1;
@@ -295,6 +295,8 @@ message ScalarValue{
295295
ScalarNestedValue large_list_value = 16;
296296
ScalarNestedValue list_value = 17;
297297
ScalarNestedValue fixed_size_list_value = 18;
298+
ScalarNestedValue list_view_value = 45;
299+
ScalarNestedValue large_list_view_value = 46;
298300
ScalarNestedValue struct_value = 32;
299301
ScalarNestedValue map_value = 41;
300302

@@ -385,6 +387,8 @@ message ArrowType{
385387
List LIST = 25;
386388
List LARGE_LIST = 26;
387389
FixedSizeList FIXED_SIZE_LIST = 27;
390+
List LIST_VIEW = 42;
391+
List LARGE_LIST_VIEW = 43;
388392
Struct STRUCT = 28;
389393
Union UNION = 29;
390394
Dictionary DICTIONARY = 30;

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType {
292292
let list_size = list.list_size;
293293
DataType::FixedSizeList(Arc::new(list_type), list_size)
294294
}
295+
arrow_type::ArrowTypeEnum::ListView(list) => {
296+
let list_type =
297+
list.as_ref().field_type.as_deref().required("field_type")?;
298+
DataType::ListView(Arc::new(list_type))
299+
}
300+
arrow_type::ArrowTypeEnum::LargeListView(list) => {
301+
let list_type =
302+
list.as_ref().field_type.as_deref().required("field_type")?;
303+
DataType::LargeListView(Arc::new(list_type))
304+
}
295305
arrow_type::ArrowTypeEnum::Struct(strct) => DataType::Struct(
296306
parse_proto_fields_to_fields(&strct.sub_field_types)?.into(),
297307
),
@@ -385,6 +395,8 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
385395
Value::ListValue(v)
386396
| Value::FixedSizeListValue(v)
387397
| Value::LargeListValue(v)
398+
| Value::ListViewValue(v)
399+
| Value::LargeListViewValue(v)
388400
| Value::StructValue(v)
389401
| Value::MapValue(v) => {
390402
let protobuf::ScalarNestedValue {
@@ -469,6 +481,12 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
469481
Value::FixedSizeListValue(_) => {
470482
Self::FixedSizeList(arr.as_fixed_size_list().to_owned().into())
471483
}
484+
Value::ListViewValue(_) => {
485+
Self::ListView(arr.as_list_view::<i32>().to_owned().into())
486+
}
487+
Value::LargeListViewValue(_) => {
488+
Self::LargeListView(arr.as_list_view::<i64>().to_owned().into())
489+
}
472490
Value::StructValue(_) => {
473491
Self::Struct(arr.as_struct().to_owned().into())
474492
}

datafusion/proto-common/src/generated/pbjson.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ impl serde::Serialize for ArrowType {
264264
arrow_type::ArrowTypeEnum::FixedSizeList(v) => {
265265
struct_ser.serialize_field("FIXEDSIZELIST", v)?;
266266
}
267+
arrow_type::ArrowTypeEnum::ListView(v) => {
268+
struct_ser.serialize_field("LISTVIEW", v)?;
269+
}
270+
arrow_type::ArrowTypeEnum::LargeListView(v) => {
271+
struct_ser.serialize_field("LARGELISTVIEW", v)?;
272+
}
267273
arrow_type::ArrowTypeEnum::Struct(v) => {
268274
struct_ser.serialize_field("STRUCT", v)?;
269275
}
@@ -329,6 +335,10 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
329335
"LARGELIST",
330336
"FIXED_SIZE_LIST",
331337
"FIXEDSIZELIST",
338+
"LIST_VIEW",
339+
"LISTVIEW",
340+
"LARGE_LIST_VIEW",
341+
"LARGELISTVIEW",
332342
"STRUCT",
333343
"UNION",
334344
"DICTIONARY",
@@ -371,6 +381,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
371381
List,
372382
LargeList,
373383
FixedSizeList,
384+
ListView,
385+
LargeListView,
374386
Struct,
375387
Union,
376388
Dictionary,
@@ -430,6 +442,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
430442
"LIST" => Ok(GeneratedField::List),
431443
"LARGELIST" | "LARGE_LIST" => Ok(GeneratedField::LargeList),
432444
"FIXEDSIZELIST" | "FIXED_SIZE_LIST" => Ok(GeneratedField::FixedSizeList),
445+
"LISTVIEW" | "LIST_VIEW" => Ok(GeneratedField::ListView),
446+
"LARGELISTVIEW" | "LARGE_LIST_VIEW" => Ok(GeneratedField::LargeListView),
433447
"STRUCT" => Ok(GeneratedField::Struct),
434448
"UNION" => Ok(GeneratedField::Union),
435449
"DICTIONARY" => Ok(GeneratedField::Dictionary),
@@ -687,6 +701,20 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
687701
return Err(serde::de::Error::duplicate_field("FIXEDSIZELIST"));
688702
}
689703
arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::FixedSizeList)
704+
;
705+
}
706+
GeneratedField::ListView => {
707+
if arrow_type_enum__.is_some() {
708+
return Err(serde::de::Error::duplicate_field("LISTVIEW"));
709+
}
710+
arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::ListView)
711+
;
712+
}
713+
GeneratedField::LargeListView => {
714+
if arrow_type_enum__.is_some() {
715+
return Err(serde::de::Error::duplicate_field("LARGELISTVIEW"));
716+
}
717+
arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::LargeListView)
690718
;
691719
}
692720
GeneratedField::Struct => {
@@ -7477,6 +7505,12 @@ impl serde::Serialize for ScalarValue {
74777505
scalar_value::Value::FixedSizeListValue(v) => {
74787506
struct_ser.serialize_field("fixedSizeListValue", v)?;
74797507
}
7508+
scalar_value::Value::ListViewValue(v) => {
7509+
struct_ser.serialize_field("listViewValue", v)?;
7510+
}
7511+
scalar_value::Value::LargeListViewValue(v) => {
7512+
struct_ser.serialize_field("largeListViewValue", v)?;
7513+
}
74807514
scalar_value::Value::StructValue(v) => {
74817515
struct_ser.serialize_field("structValue", v)?;
74827516
}
@@ -7611,6 +7645,10 @@ impl<'de> serde::Deserialize<'de> for ScalarValue {
76117645
"listValue",
76127646
"fixed_size_list_value",
76137647
"fixedSizeListValue",
7648+
"list_view_value",
7649+
"listViewValue",
7650+
"large_list_view_value",
7651+
"largeListViewValue",
76147652
"struct_value",
76157653
"structValue",
76167654
"map_value",
@@ -7679,6 +7717,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue {
76797717
LargeListValue,
76807718
ListValue,
76817719
FixedSizeListValue,
7720+
ListViewValue,
7721+
LargeListViewValue,
76827722
StructValue,
76837723
MapValue,
76847724
Decimal32Value,
@@ -7742,6 +7782,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue {
77427782
"largeListValue" | "large_list_value" => Ok(GeneratedField::LargeListValue),
77437783
"listValue" | "list_value" => Ok(GeneratedField::ListValue),
77447784
"fixedSizeListValue" | "fixed_size_list_value" => Ok(GeneratedField::FixedSizeListValue),
7785+
"listViewValue" | "list_view_value" => Ok(GeneratedField::ListViewValue),
7786+
"largeListViewValue" | "large_list_view_value" => Ok(GeneratedField::LargeListViewValue),
77457787
"structValue" | "struct_value" => Ok(GeneratedField::StructValue),
77467788
"mapValue" | "map_value" => Ok(GeneratedField::MapValue),
77477789
"decimal32Value" | "decimal32_value" => Ok(GeneratedField::Decimal32Value),
@@ -7909,6 +7951,20 @@ impl<'de> serde::Deserialize<'de> for ScalarValue {
79097951
return Err(serde::de::Error::duplicate_field("fixedSizeListValue"));
79107952
}
79117953
value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::FixedSizeListValue)
7954+
;
7955+
}
7956+
GeneratedField::ListViewValue => {
7957+
if value__.is_some() {
7958+
return Err(serde::de::Error::duplicate_field("listViewValue"));
7959+
}
7960+
value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::ListViewValue)
7961+
;
7962+
}
7963+
GeneratedField::LargeListViewValue => {
7964+
if value__.is_some() {
7965+
return Err(serde::de::Error::duplicate_field("largeListViewValue"));
7966+
}
7967+
value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::LargeListViewValue)
79127968
;
79137969
}
79147970
GeneratedField::StructValue => {

datafusion/proto-common/src/generated/prost.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ pub struct Union {
184184
#[prost(int32, repeated, tag = "3")]
185185
pub type_ids: ::prost::alloc::vec::Vec<i32>,
186186
}
187-
/// Used for List/FixedSizeList/LargeList/Struct/Map
187+
/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map
188188
#[derive(Clone, PartialEq, ::prost::Message)]
189189
pub struct ScalarNestedValue {
190190
#[prost(bytes = "vec", tag = "1")]
@@ -311,7 +311,7 @@ pub struct ScalarFixedSizeBinary {
311311
pub struct ScalarValue {
312312
#[prost(
313313
oneof = "scalar_value::Value",
314-
tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42"
314+
tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 45, 46, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42"
315315
)]
316316
pub value: ::core::option::Option<scalar_value::Value>,
317317
}
@@ -362,6 +362,10 @@ pub mod scalar_value {
362362
ListValue(super::ScalarNestedValue),
363363
#[prost(message, tag = "18")]
364364
FixedSizeListValue(super::ScalarNestedValue),
365+
#[prost(message, tag = "45")]
366+
ListViewValue(super::ScalarNestedValue),
367+
#[prost(message, tag = "46")]
368+
LargeListViewValue(super::ScalarNestedValue),
365369
#[prost(message, tag = "32")]
366370
StructValue(super::ScalarNestedValue),
367371
#[prost(message, tag = "41")]
@@ -449,7 +453,7 @@ pub struct Decimal256 {
449453
pub struct ArrowType {
450454
#[prost(
451455
oneof = "arrow_type::ArrowTypeEnum",
452-
tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33"
456+
tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 42, 43, 28, 29, 30, 33"
453457
)]
454458
pub arrow_type_enum: ::core::option::Option<arrow_type::ArrowTypeEnum>,
455459
}
@@ -530,6 +534,10 @@ pub mod arrow_type {
530534
LargeList(::prost::alloc::boxed::Box<super::List>),
531535
#[prost(message, tag = "27")]
532536
FixedSizeList(::prost::alloc::boxed::Box<super::FixedSizeList>),
537+
#[prost(message, tag = "42")]
538+
ListView(::prost::alloc::boxed::Box<super::List>),
539+
#[prost(message, tag = "43")]
540+
LargeListView(::prost::alloc::boxed::Box<super::List>),
533541
#[prost(message, tag = "28")]
534542
Struct(super::Struct),
535543
#[prost(message, tag = "29")]

datafusion/proto-common/src/to_proto/mod.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,12 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue {
374374
ScalarValue::FixedSizeList(arr) => {
375375
encode_scalar_nested_value(arr.to_owned() as ArrayRef, val)
376376
}
377+
ScalarValue::ListView(arr) => {
378+
encode_scalar_nested_value(arr.to_owned() as ArrayRef, val)
379+
}
380+
ScalarValue::LargeListView(arr) => {
381+
encode_scalar_nested_value(arr.to_owned() as ArrayRef, val)
382+
}
377383
ScalarValue::Struct(arr) => {
378384
encode_scalar_nested_value(arr.to_owned() as ArrayRef, val)
379385
}
@@ -1006,7 +1012,7 @@ fn create_proto_scalar<I, T: FnOnce(&I) -> protobuf::scalar_value::Value>(
10061012
Ok(protobuf::ScalarValue { value: Some(value) })
10071013
}
10081014

1009-
// ScalarValue::List / FixedSizeList / LargeList / Struct / Map are serialized using
1015+
// ScalarValue::List / FixedSizeList / LargeList / ListView / LargeListView / Struct / Map are serialized using
10101016
// Arrow IPC messages as a single column RecordBatch
10111017
fn encode_scalar_nested_value(
10121018
arr: ArrayRef,
@@ -1062,6 +1068,16 @@ fn encode_scalar_nested_value(
10621068
scalar_list_value,
10631069
)),
10641070
}),
1071+
ScalarValue::ListView(_) => Ok(protobuf::ScalarValue {
1072+
value: Some(protobuf::scalar_value::Value::ListViewValue(
1073+
scalar_list_value,
1074+
)),
1075+
}),
1076+
ScalarValue::LargeListView(_) => Ok(protobuf::ScalarValue {
1077+
value: Some(protobuf::scalar_value::Value::LargeListViewValue(
1078+
scalar_list_value,
1079+
)),
1080+
}),
10651081
ScalarValue::Struct(_) => Ok(protobuf::ScalarValue {
10661082
value: Some(protobuf::scalar_value::Value::StructValue(
10671083
scalar_list_value,

datafusion/proto/src/generated/datafusion_proto_common.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ pub struct Union {
184184
#[prost(int32, repeated, tag = "3")]
185185
pub type_ids: ::prost::alloc::vec::Vec<i32>,
186186
}
187-
/// Used for List/FixedSizeList/LargeList/Struct/Map
187+
/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map
188188
#[derive(Clone, PartialEq, ::prost::Message)]
189189
pub struct ScalarNestedValue {
190190
#[prost(bytes = "vec", tag = "1")]
@@ -311,7 +311,7 @@ pub struct ScalarFixedSizeBinary {
311311
pub struct ScalarValue {
312312
#[prost(
313313
oneof = "scalar_value::Value",
314-
tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42"
314+
tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 45, 46, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42"
315315
)]
316316
pub value: ::core::option::Option<scalar_value::Value>,
317317
}
@@ -362,6 +362,10 @@ pub mod scalar_value {
362362
ListValue(super::ScalarNestedValue),
363363
#[prost(message, tag = "18")]
364364
FixedSizeListValue(super::ScalarNestedValue),
365+
#[prost(message, tag = "45")]
366+
ListViewValue(super::ScalarNestedValue),
367+
#[prost(message, tag = "46")]
368+
LargeListViewValue(super::ScalarNestedValue),
365369
#[prost(message, tag = "32")]
366370
StructValue(super::ScalarNestedValue),
367371
#[prost(message, tag = "41")]
@@ -449,7 +453,7 @@ pub struct Decimal256 {
449453
pub struct ArrowType {
450454
#[prost(
451455
oneof = "arrow_type::ArrowTypeEnum",
452-
tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33"
456+
tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 42, 43, 28, 29, 30, 33"
453457
)]
454458
pub arrow_type_enum: ::core::option::Option<arrow_type::ArrowTypeEnum>,
455459
}
@@ -530,6 +534,10 @@ pub mod arrow_type {
530534
LargeList(::prost::alloc::boxed::Box<super::List>),
531535
#[prost(message, tag = "27")]
532536
FixedSizeList(::prost::alloc::boxed::Box<super::FixedSizeList>),
537+
#[prost(message, tag = "42")]
538+
ListView(::prost::alloc::boxed::Box<super::List>),
539+
#[prost(message, tag = "43")]
540+
LargeListView(::prost::alloc::boxed::Box<super::List>),
533541
#[prost(message, tag = "28")]
534542
Struct(super::Struct),
535543
#[prost(message, tag = "29")]

0 commit comments

Comments
 (0)