Skip to content

Commit b5a74e7

Browse files
authored
[IO] Add Parquet Dataset Support. (#270)
1 parent d4caaf4 commit b5a74e7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2926
-104
lines changed

tensorflow/core/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,7 @@ cc_library(
11611161
tf_gen_op_libs(
11621162
is_external = False,
11631163
op_lib_names = [
1164+
"parquet_ops",
11641165
"batch_ops",
11651166
"bitwise_ops",
11661167
"boosted_trees_ops",
@@ -1416,6 +1417,7 @@ cc_library(
14161417
visibility = ["//visibility:public"],
14171418
deps = [
14181419
":array_ops_op_lib",
1420+
":parquet_ops_op_lib",
14191421
":audio_ops_op_lib",
14201422
":batch_ops_op_lib",
14211423
":bitwise_ops_op_lib",
@@ -1620,6 +1622,7 @@ cc_library(
16201622
"//tensorflow/core/kernels:function_ops",
16211623
"//tensorflow/core/kernels:functional_ops",
16221624
"//tensorflow/core/kernels:fused_embedding_ops",
1625+
"//tensorflow/core/kernels/data:parquet_dataset_ops",
16231626
"//tensorflow/core/kernels:grappler",
16241627
"//tensorflow/core/kernels:hash_ops",
16251628
"//tensorflow/core/kernels:histogram_op",
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
op {
2+
graph_op_name: "ParquetTabularDatasetV1"
3+
}

tensorflow/core/kernels/data/BUILD

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ load(
55
"//tensorflow:tensorflow.bzl",
66
"tf_cc_test",
77
"tf_kernel_library",
8+
"pybind_extension",
89
)
910

1011
package(
@@ -1263,6 +1264,7 @@ tf_kernel_library(
12631264
tf_kernel_library(
12641265
name = "data",
12651266
deps = [
1267+
":parquet_dataset_ops",
12661268
":batch_dataset_op",
12671269
":cache_dataset_ops",
12681270
":concatenate_dataset_op",
@@ -1365,3 +1367,47 @@ tf_kernel_library(
13651367
"//tensorflow/core:lib_internal",
13661368
],
13671369
)
1370+
1371+
cc_library(
1372+
name = "parquet_dataset_ops",
1373+
srcs = [
1374+
"parquet_dataset_ops.cc",
1375+
"parquet_batch_reader.h",
1376+
"parquet_batch_reader.cc",
1377+
],
1378+
hdrs = ["parquet_dataset_ops.h"],
1379+
deps = [
1380+
":arrow_util",
1381+
":dataset_ops",
1382+
"//tensorflow/core:framework",
1383+
],
1384+
)
1385+
1386+
pybind_extension(
1387+
name = "_parquet_pybind",
1388+
srcs = ["parquet_pybind.cc"],
1389+
copts = ["-fexceptions"],
1390+
features = ["-use_header_modules"],
1391+
module_name = "_parquet_pybind",
1392+
deps = [
1393+
":arrow_util",
1394+
"@pybind11",
1395+
],
1396+
)
1397+
1398+
cc_library(
1399+
name = "arrow_util",
1400+
srcs = ["arrow_util.cc",
1401+
"eigen.h"],
1402+
hdrs = ["arrow_util.h"],
1403+
deps = [
1404+
"@arrow",
1405+
"//third_party/eigen3",
1406+
"//tensorflow/core:framework",
1407+
],
1408+
defines = [
1409+
"DEEPREC_ARROW_HDFS",
1410+
"DEEPREC_ARROW_S3",
1411+
"DEEPREC_ARROW_ZEROCOPY",
1412+
]
1413+
)

0 commit comments

Comments
 (0)