From 124d3ea2f0181b64fa3a0cc511201fc1a07fc067 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 12 Jan 2025 15:06:01 +0800 Subject: [PATCH 1/9] fix: type checking --- python/datafusion/__init__.py | 2 +- python/datafusion/_internal/__init__.pyi | 468 ++++++++++++++++++ python/datafusion/_internal/common.pyi | 245 +++++++++ python/datafusion/_internal/expr/__init__.pyi | 43 ++ .../datafusion/_internal/expr/aggregate.pyi | 20 + .../_internal/expr/aggregate_expr.pyi | 10 + python/datafusion/_internal/expr/alias.pyi | 6 + python/datafusion/_internal/expr/analyze.pyi | 11 + python/datafusion/_internal/expr/base.pyi | 108 ++++ python/datafusion/_internal/expr/between.pyi | 11 + .../datafusion/_internal/expr/binary_expr.pyi | 10 + .../datafusion/_internal/expr/bool_expr.pyi | 32 ++ python/datafusion/_internal/expr/case.pyi | 10 + python/datafusion/_internal/expr/cast.pyi | 14 + python/datafusion/_internal/expr/column.pyi | 9 + .../_internal/expr/conditional_expr.pyi | 10 + .../_internal/expr/create_memory_table.pyi | 10 + .../datafusion/_internal/expr/create_view.pyi | 11 + python/datafusion/_internal/expr/distinct.pyi | 9 + .../datafusion/_internal/expr/drop_table.pyi | 12 + .../_internal/expr/empty_relation.pyi | 10 + python/datafusion/_internal/expr/exists.pyi | 7 + python/datafusion/_internal/expr/explain.pyi | 16 + .../datafusion/_internal/expr/extension.pyi | 2 + python/datafusion/_internal/expr/filter.pyi | 12 + .../_internal/expr/grouping_set.pyi | 2 + python/datafusion/_internal/expr/in_list.pyi | 9 + .../datafusion/_internal/expr/in_subquery.pyi | 11 + python/datafusion/_internal/expr/join.pyi | 32 ++ python/datafusion/_internal/expr/like.pyi | 31 ++ python/datafusion/_internal/expr/limit.pyi | 9 + python/datafusion/_internal/expr/literal.pyi | 44 ++ .../datafusion/_internal/expr/placeholder.pyi | 6 + .../datafusion/_internal/expr/projection.pyi | 15 + .../datafusion/_internal/expr/repartition.pyi | 14 + .../_internal/expr/scalar_subquery.pyi | 5 + .../_internal/expr/scalar_variable.pyi | 10 + python/datafusion/_internal/expr/sort.pyi | 15 + .../datafusion/_internal/expr/sort_expr.pyi | 16 + python/datafusion/_internal/expr/subquery.pyi | 9 + .../_internal/expr/subquery_alias.pyi | 13 + .../datafusion/_internal/expr/table_scan.pyi | 18 + python/datafusion/_internal/expr/union.pyi | 11 + python/datafusion/_internal/expr/unnest.pyi | 11 + .../datafusion/_internal/expr/unnest_expr.pyi | 7 + python/datafusion/_internal/expr/window.pyi | 48 ++ .../_internal/functions/__init__.pyi | 376 ++++++++++++++ .../_internal/object_store/__init__.pyi | 43 ++ .../_internal/substrait/__init__.pyi | 37 ++ python/datafusion/catalog.py | 2 +- python/datafusion/context.py | 37 +- python/datafusion/dataframe.py | 23 +- python/datafusion/expr.py | 32 +- python/datafusion/functions.py | 122 ++--- python/datafusion/input/location.py | 10 +- python/datafusion/udf.py | 18 +- 56 files changed, 2018 insertions(+), 116 deletions(-) create mode 100644 python/datafusion/_internal/__init__.pyi create mode 100644 python/datafusion/_internal/common.pyi create mode 100644 python/datafusion/_internal/expr/__init__.pyi create mode 100644 python/datafusion/_internal/expr/aggregate.pyi create mode 100644 python/datafusion/_internal/expr/aggregate_expr.pyi create mode 100644 python/datafusion/_internal/expr/alias.pyi create mode 100644 python/datafusion/_internal/expr/analyze.pyi create mode 100644 python/datafusion/_internal/expr/base.pyi create mode 100644 python/datafusion/_internal/expr/between.pyi create mode 100644 python/datafusion/_internal/expr/binary_expr.pyi create mode 100644 python/datafusion/_internal/expr/bool_expr.pyi create mode 100644 python/datafusion/_internal/expr/case.pyi create mode 100644 python/datafusion/_internal/expr/cast.pyi create mode 100644 python/datafusion/_internal/expr/column.pyi create mode 100644 python/datafusion/_internal/expr/conditional_expr.pyi create mode 100644 python/datafusion/_internal/expr/create_memory_table.pyi create mode 100644 python/datafusion/_internal/expr/create_view.pyi create mode 100644 python/datafusion/_internal/expr/distinct.pyi create mode 100644 python/datafusion/_internal/expr/drop_table.pyi create mode 100644 python/datafusion/_internal/expr/empty_relation.pyi create mode 100644 python/datafusion/_internal/expr/exists.pyi create mode 100644 python/datafusion/_internal/expr/explain.pyi create mode 100644 python/datafusion/_internal/expr/extension.pyi create mode 100644 python/datafusion/_internal/expr/filter.pyi create mode 100644 python/datafusion/_internal/expr/grouping_set.pyi create mode 100644 python/datafusion/_internal/expr/in_list.pyi create mode 100644 python/datafusion/_internal/expr/in_subquery.pyi create mode 100644 python/datafusion/_internal/expr/join.pyi create mode 100644 python/datafusion/_internal/expr/like.pyi create mode 100644 python/datafusion/_internal/expr/limit.pyi create mode 100644 python/datafusion/_internal/expr/literal.pyi create mode 100644 python/datafusion/_internal/expr/placeholder.pyi create mode 100644 python/datafusion/_internal/expr/projection.pyi create mode 100644 python/datafusion/_internal/expr/repartition.pyi create mode 100644 python/datafusion/_internal/expr/scalar_subquery.pyi create mode 100644 python/datafusion/_internal/expr/scalar_variable.pyi create mode 100644 python/datafusion/_internal/expr/sort.pyi create mode 100644 python/datafusion/_internal/expr/sort_expr.pyi create mode 100644 python/datafusion/_internal/expr/subquery.pyi create mode 100644 python/datafusion/_internal/expr/subquery_alias.pyi create mode 100644 python/datafusion/_internal/expr/table_scan.pyi create mode 100644 python/datafusion/_internal/expr/union.pyi create mode 100644 python/datafusion/_internal/expr/unnest.pyi create mode 100644 python/datafusion/_internal/expr/unnest_expr.pyi create mode 100644 python/datafusion/_internal/expr/window.pyi create mode 100644 python/datafusion/_internal/functions/__init__.pyi create mode 100644 python/datafusion/_internal/object_store/__init__.pyi create mode 100644 python/datafusion/_internal/substrait/__init__.pyi diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 2d8db42c8..c73d424dd 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -24,7 +24,7 @@ try: import importlib.metadata as importlib_metadata except ImportError: - import importlib_metadata + import importlib_metadata # type: ignore from .context import ( SessionContext, diff --git a/python/datafusion/_internal/__init__.pyi b/python/datafusion/_internal/__init__.pyi new file mode 100644 index 000000000..2a47b0d23 --- /dev/null +++ b/python/datafusion/_internal/__init__.pyi @@ -0,0 +1,468 @@ +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Set, Tuple +import pyarrow as pa +from pyarrow.dataset import Dataset +import pandas as pd +import polars as pl +from ..udf import Accumulator, WindowEvaluator +from ..context import ArrowStreamExportable, TableProviderExportable +from .expr import SortExpr, Expr + +class Catalog: + def names(self) -> List[str]: ... + + def database(self, name: str = "public") -> Database: ... + + +class Database: + def names(self) -> Set[str]: ... + + def table(self, name: str) -> Table: ... + +class Table: + @property + def schema(self) -> pa.Schema: + ... + + @property + def kind(self) -> str: + ... + +class SessionConfig: + def __init__(self, config_options: Optional[Dict[str, str]] = None) -> None: ... + + def with_create_default_catalog_and_schema(self, enabled: bool) -> SessionConfig: ... + + def with_default_catalog_and_schema(self, catalog: str, schema: str) -> SessionConfig: ... + + def with_information_schema(self, enabled: bool) -> SessionConfig: ... + + def with_batch_size(self, batch_size: int) -> SessionConfig: ... + + def with_target_partitions(self, target_partitions: int) -> SessionConfig: ... + + def with_repartition_aggregations(self, enabled: bool) -> SessionConfig: ... + + def with_repartition_joins(self, enabled: bool) -> SessionConfig: ... + + def with_repartition_windows(self, enabled: bool) -> SessionConfig: ... + + def with_repartition_sorts(self, enabled: bool) -> SessionConfig: ... + + def with_repartition_file_scans(self, enabled: bool) -> SessionConfig: ... + + def with_repartition_file_min_size(self, size: int) -> SessionConfig: ... + + def with_parquet_pruning(self, enabled: bool) -> SessionConfig: ... + + def set(self, key: str, value: str) -> SessionConfig: ... + + +class RuntimeEnvBuilder: + def __init__(self) -> None: ... + + def with_disk_manager_disabled(self) -> RuntimeEnvBuilder: ... + + def with_disk_manager_os(self) -> RuntimeEnvBuilder: ... + + def with_disk_manager_specified(self, paths: List[str]) -> RuntimeEnvBuilder: ... + + def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder: ... + + def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder: ... + + def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder: ... + + def with_temp_file_path(self, path: str) -> RuntimeEnvBuilder: ... + + +class SQLOptions: + def __init__(self) -> None: ... + + def with_allow_ddl(self, allow: bool) -> SQLOptions: ... + + def with_allow_dml(self, allow: bool) -> SQLOptions: ... + + def with_allow_statements(self, allow: bool) -> SQLOptions: ... + + +class SessionContext: + def __init__(self, config: Optional[SessionConfig] = None, runtime: Optional[RuntimeEnvBuilder] = None) -> None: ... + + def enable_url_table(self) -> SessionContext: ... + + def register_object_store(self, schema: str, storage: Any, host: Optional[str] = None): ... + + def register_listing_table(self, name: str, path: str, table_partition_cols: List[Tuple[str, str]] = [], file_extension: str = ".parquet", schema: Optional[pa.Schema] = None, file_sort_order: Optional[List[List[SortExpr]]] = None): ... + + def sql(self, query: str) -> DataFrame: ... + + def sql_with_options(self, query: str, options: Optional[SQLOptions] = None) -> DataFrame: ... + + def create_dataframe(self, partitions: List[List[pa.RecordBatch]], name: Optional[str] = None, schema: Optional[pa.Schema] = None) -> DataFrame: ... + + def create_dataframe_from_logical_plan(self, plan: LogicalPlan) -> DataFrame: ... + + def from_pylist(self, data: list, name: Optional[str] = None) -> DataFrame: ... + + def from_pydict(self, data: dict, name: Optional[str] = None) -> DataFrame: ... + + def from_arrow(self, data: ArrowStreamExportable | pa.RecordBatchReader, name: Optional[str] = None) -> DataFrame: ... + + def from_pandas(self, data: pd.DataFrame, name: Optional[str] = None) -> DataFrame: ... + + def from_polars(self, data: pl.DataFrame, name: Optional[str] = None) -> DataFrame: ... + + def register_table(self, data: str, table: Table): ... + + def deregister_table(self, name: str): ... + + def register_table_provider(self, name: str, provider: TableProviderExportable): ... + + def register_record_batches(self, name: str, partitions: List[List[pa.RecordBatch]]): ... + + def register_parquet( + self, + name: str, + path: str, + table_partition_cols: List[Tuple[str, str]] = [], + parquet_pruning: bool = True, + file_extension: str = ".parquet", + skip_metadata: bool = True, + schema: Optional[pa.Schema] = None, + file_sort_order: Optional[List[List[SortExpr]]] = None, + **kwargs + ): + ... + + def register_csv( + self, + name: str, + path: str | List[str], + schema: Optional[pa.Schema] = None, + has_header: bool = True, + delimiter: str = ",", + schema_infer_max_records: int = 1000, + file_extension: str = ".csv", + file_compression_type: Optional[str] = None, + **kwargs + ): + ... + + def register_json( + self, + name: str, + path: str | Path, + schema: Optional[pa.Schema] = None, + schema_infer_max_records: int = 1000, + file_extension: str = ".json", + table_partition_cols: List[Tuple[str, str]] = [], + file_compression_type: Optional[str] = None, + **kwargs + ): + ... + + def register_avro( + self, + name: str, + path: str | Path, + schema: Optional[pa.Schema] = None, + file_extension: str = ".avro", + table_partition_cols: List[Tuple[str, str]] = [], + ): + ... + + def register_dataset( + self, + name: str, + dataset: Dataset + ): + ... + + def register_udf(self, udf: ScalarUDF): ... + + def register_udaf(self, udaf: AggregateUDF): ... + + def register_udwf(self, udwf: WindowUDF): ... + + def catalog(self, name: str = "datafusion") -> Catalog: ... + + def tables(self) -> Set[str]: ... + + def table(self, name: str) -> DataFrame: ... + + def table_exist(self, name: str) -> bool: ... + + def empty_table(self) -> DataFrame: ... + + def session_id(self) -> str: ... + + def read_json( + self, + path: str | List[str], + schema: Optional[pa.Schema] = None, + schema_infer_max_records: int = 1000, + file_extension: str = ".json", + table_partition_cols: List[Tuple[str, str]] = [], + file_compression_type: Optional[str] = None, + **kwargs + ): + ... + + def read_csv( + self, + path: str | List[str], + schema: Optional[pa.Schema] = None, + head_header: bool = True, + delimiter: str = ",", + schema_infer_max_records: int = 1000, + file_extension: str = ".csv", + table_partition_cols: List[Tuple[str, str]] = [], + file_compression_type: Optional[str] = None, + **kwargs + ): + ... + + def read_parquet( + self, + path: str | List[str], + table_partition_cols: List[Tuple[str, str]] = [], + parquet_pruning: bool = True, + file_extension: str = ".parquet", + skip_metadata: bool = True, + schema: Optional[pa.Schema] = None, + file_sort_order: Optional[List[List[SortExpr]]] = None, + **kwargs + ): + ... + + def read_avro( + self, + path: str, + schema: Optional[pa.Schema] = None, + table_partition_cols: List[Tuple[str, str]] = [], + file_extension: str = ".avro", + **kwargs + ): + ... + + def read_table(self, table: Table) -> DataFrame: ... + + def execute(self, plan: ExecutionPlan, part: int) -> RecordBatchStream: + ... + + +class DataFrame: + def __getitem__(self, key: str | List[str] | Tuple[str, ...]) -> DataFrame: ... + + def _repr_html_(self) -> str: ... + + def describe(self) -> DataFrame: ... + + def schema(self) -> pa.Schema: ... + + def select_columns(self, *args: str) -> DataFrame: ... + + def select(self, *args: Expr) -> DataFrame: ... + + def drop(self, *args: str) -> DataFrame: ... + + def filter(self, predicate: Expr) -> DataFrame: ... + + def with_column(self, name: str, expr: Expr) -> DataFrame: ... + + def with_columns(self, exprs: List[Expr]) -> DataFrame: ... + + def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: ... + + def aggregate(self, group_by: List[Expr], aggs: List[Expr]) -> DataFrame: ... + + def sort(self, *exprs: SortExpr) -> DataFrame: ... + + def limit(self, count: int, offset: int) -> DataFrame: ... + + def collect(self) -> List[pa.RecordBatch]: ... + + def cache(self) -> DataFrame: ... + + def collect_partitioned(self) -> List[List[pa.RecordBatch]]: ... + + def show(self, num: int = 20): ... + + def distinct(self) -> DataFrame: ... + + def join(self, right: DataFrame, how: str, left_on: List[str], right_on: List[str]) -> DataFrame: ... + + def join_on(self, right: DataFrame, on_exprs: List[Expr], how: str) -> DataFrame: ... + + def explain(self, verbose: bool = False, analyze: bool = False): ... + + def logical_plan(self) -> LogicalPlan: ... + + def optimized_logical_plan(self) -> LogicalPlan: ... + + def execution_plan(self) -> ExecutionPlan: ... + + def repartition(self, num: int) -> DataFrame: ... + + def repartition_by_hash(self, *args: Expr, num: int) -> DataFrame: ... + + def union(self, py_df: DataFrame, distinct: bool = False) -> DataFrame: ... + + def union_distinct(self, py_df: DataFrame) -> DataFrame: ... + + def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame: ... + + def unnest_columns(self, columns: List[str], preserve_nulls: bool = True) -> DataFrame: ... + + def intersect(self, py_df: DataFrame) -> DataFrame: ... + + def except_all(self, py_df: DataFrame) -> DataFrame: ... + + def write_csv(self, path: str, with_header: bool): ... + + def write_parquet(self, path: str, compression: str = "uncompressed", compression_level: Optional[int] = None): ... + + def write_json(self, path: str): ... + + def to_arrow_table(self) -> pa.Table: ... + + def __arrow_c_stream__( + self, requested_schema: object | None = None + ) -> object: ... + + def execute_stream(self) -> RecordBatchStream: ... + + def execute_stream_partitioned(self) -> List[RecordBatchStream]: ... + + def to_pandas(self) -> pd.DataFrame: ... + + def to_pylist(self) -> list: ... + + def to_pydict(self) -> dict: ... + + def to_polars(self) -> pl.DataFrame: ... + + def count(self) -> int: ... + + +class ScalarUDF: + def __init__( + self, + name: str, + func: Callable[..., pa.DataType], + input_types: List[pa.DataType], + return_type: pa.DataType, + volatility: str + ) -> None: ... + + def __call__(self, *args: Expr) -> Expr: ... + +class AggregateUDF: + def __init__( + self, + name: str, + accumulator: Callable[[], Accumulator], + input_types: List[pa.DataType], + return_type: pa.DataType, + state_type: List[pa.DataType], + volatility: str, + ) -> None: ... + + def __call__(self, *args: Expr) -> Expr: ... + +class WindowUDF: + def __init__( + self, + name: str, + evaluator: Callable[[], WindowEvaluator], + input_types: List[pa.DataType], + return_type: pa.DataType, + volatility: str, + ) -> None: ... + + def __call__(self, *args: Expr) -> Expr: ... + +class Config: + def __init__(self) -> None: ... + + @staticmethod + def from_env() -> Config: ... + + def get(self, key: str) -> Optional[str]: + ... + + def set(self, key: str, value: object): + ... + + def get_all(self) -> Dict[str, Optional[str]]: + ... + + +class LogicalPlan: + def to_variant(self) -> Any: + ... + + def inputs(self) -> List[LogicalPlan]: + ... + + def display(self) -> str: + ... + + def display_indent(self) -> str: + ... + + def display_indent_schema(self) -> str: + ... + + def display_graphviz(self) -> str: + ... + + def to_proto(self) -> bytes: + ... + + @staticmethod + def from_proto(ctx: SessionContext, proto_msg: bytes) -> LogicalPlan: + ... + +class ExecutionPlan: + def children(self) -> List[ExecutionPlan]: + ... + + def display(self) -> str: + ... + + def display_indent(self) -> str: + ... + + def to_proto(self) -> bytes: + ... + + @staticmethod + def from_proto(ctx: SessionContext, proto_msg: bytes) -> ExecutionPlan: + ... + + @property + def partition_count(self) -> int: ... + + +class RecordBatch: + def to_pyarrow(self) -> pa.RecordBatch: ... + + +class RecordBatchStream: + def next(self) -> RecordBatch: + ... + + def __next__(self) -> RecordBatch: + ... + + async def __anext__(self) -> RecordBatch: + ... + + def __iter__(self) -> RecordBatch: + ... + + async def __aiter__(self) -> RecordBatch: + ... + diff --git a/python/datafusion/_internal/common.pyi b/python/datafusion/_internal/common.pyi new file mode 100644 index 000000000..902d6753b --- /dev/null +++ b/python/datafusion/_internal/common.pyi @@ -0,0 +1,245 @@ +import enum +from typing import List, Optional, Tuple + +class DFSchema: + @staticmethod + def empty() -> DFSchema: + ... + + def field_names(self) -> List[str]: + ... + +class DataType: + ... + + +class RexType(enum.IntEnum): + Alias = 0 + Literal = 1 + Call = 2 + Reference = 3 + ScalarSubquery = 4 + Other = 5 + + +class PythonType(enum.IntEnum): + Array = 0 + Bool = 1 + Bytes = 2 + Datetime = 3 + Float = 4 + Int = 5 + List = 6 + None_ = 7 + Object = 8 + Str = 9 + + +class SqlType(enum.IntEnum): + ANY = 0 + ARRAY = 1 + BIGINT = 2 + BINARY = 3 + BOOLEAN = 4 + CHAR = 5 + COLUMN_LIST = 6 + CURSOR = 7 + DATE = 8 + DECIMAL = 9 + DISTINCT = 10 + DOUBLE = 11 + DYNAMIC_STAR = 12 + FLOAT = 13 + GEOMETRY = 14 + INTEGER = 15 + INTERVAL = 16 + INTERVAL_DAY = 17 + INTERVAL_DAY_HOUR = 18 + INTERVAL_DAY_MINUTE = 19 + INTERVAL_DAY_SECOND = 20 + INTERVAL_HOUR = 21 + INTERVAL_HOUR_MINUTE = 22 + INTERVAL_HOUR_SECOND = 23 + INTERVAL_MINUTE = 24 + INTERVAL_MINUTE_SECOND = 25 + INTERVAL_MONTH = 26 + INTERVAL_SECOND = 27 + INTERVAL_YEAR = 28 + INTERVAL_YEAR_MONTH = 29 + MAP = 30 + MULTISET = 31 + NULL = 32 + OTHER = 33 + REAL = 34 + ROW = 35 + SARG = 36 + SMALLINT = 37 + STRUCTURED = 38 + SYMBOL = 39 + TIME = 40 + TIME_WITH_LOCAL_TIME_ZONE = 41 + TIMESTAMP = 42 + TIMESTAMP_WITH_LOCAL_TIME_ZONE = 43 + TINYINT = 44 + UNKNOWN = 45 + VARBINARY = 46 + VARCHAR = 47 + + +class DataTypeMap: + def __init__(self, arrow_type: DataType, python_type: PythonType, sql_type: SqlType) -> None: ... + + @staticmethod + def from_parquet_type_str(parquet_str_type: str) -> DataTypeMap: ... + + @staticmethod + def arrow(arrow_type: DataType) -> DataTypeMap: ... + + @staticmethod + def arrow_str(arrow_type_str: str) -> DataTypeMap: ... + + @staticmethod + def sql(sql_type: SqlType) -> DataTypeMap: ... + + def friendly_arrow_type_name(self) -> str: ... + + @property + def arrow_type(self) -> DataType: ... + + @arrow_type.setter + def arrow_type(self, arrow_type: DataType): ... + + @property + def python_type(self) -> PythonType: ... + + @python_type.setter + def python_type(self, python_type: PythonType): ... + + @property + def sql_type(self) -> SqlType: ... + + @sql_type.setter + def sql_type(self, sql_type: SqlType): ... + + +class NullTreatment(enum.IntEnum): + IGNORE_NULLS = 0 + RESPECT_NULLS = 1 + + +class SqlSchema: + + def __init__(self, schema_name: str) -> None: ... + + def table_by_name(self, table_name: str) -> Optional[SqlTable]: ... + + def add_table(self, table: SqlTable): ... + + def drop_table(self, table_name: str): ... + + @property + def name(self) -> str: ... + + @name.setter + def name(self, name: str): ... + + @property + def tables(self) -> List[SqlTable]: ... + + @tables.setter + def tables(self, tables: List[SqlTable]): ... + + @property + def views(self) -> List[SqlView]: ... + + @views.setter + def views(self, views: List[SqlView]): ... + + @property + def functions(self) -> List[SqlFunction]: ... + + @functions.setter + def functions(self, functions: List[SqlFunction]): ... + + +class SqlTable: + def __init__( + self, + table_name: str, + columns: List[Tuple[str, DataTypeMap]], + row_count: int, + filepaths: Optional[List[str]] = None + ) -> None: ... + + @property + def name(self) -> str: ... + + @name.setter + def name(self, name: str): ... + + @property + def columns(self) -> List[Tuple[str, DataTypeMap]]: ... + + @columns.setter + def columns(self, columns: List[Tuple[str, DataTypeMap]]): ... + + @property + def primary_key(self) -> Optional[str]: ... + + @primary_key.setter + def primary_key(self, primary_key: Optional[str]): ... + + @property + def foreign_keys(self) -> List[str]: ... + + @foreign_keys.setter + def foreign_keys(self, foreign_keys: List[str]): ... + + @property + def indexes(self) -> List[str]: ... + + @indexes.setter + def indexes(self, indexes: List[str]): ... + + @property + def constraints(self) -> List[str]: ... + + @constraints.setter + def constraints(self, constraints: List[str]): ... + + @property + def statistics(self) -> SqlStatistics: ... + + @statistics.setter + def statistics(self, statistics: SqlStatistics): ... + + @property + def filepaths(self) -> Optional[List[str]]: ... + + @filepaths.setter + def filepaths(self, filepaths: Optional[List[str]]): ... + + +class SqlView: + + @property + def name(self) -> str: ... + + @name.setter + def name(self, name: str): ... + + @property + def definition(self) -> str: ... + + @definition.setter + def definition(self, definition: str): ... + + +class SqlStatistics: + def __init__(self, row_count: float) -> None: ... + + def getRowCount(self) -> float: ... + + +class SqlFunction: + ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/__init__.pyi b/python/datafusion/_internal/expr/__init__.pyi new file mode 100644 index 000000000..72a9297eb --- /dev/null +++ b/python/datafusion/_internal/expr/__init__.pyi @@ -0,0 +1,43 @@ +from .base import Expr as Expr, ExprFuncBuilder as ExprFuncBuilder +from .column import Column as Column +from .literal import Literal as Literal +from .binary_expr import BinaryExpr as BinaryExpr +from .literal import Literal as Literal +from .aggregate_expr import AggregateFunction as AggregateFunction +from .bool_expr import Not as Not, IsNotNull as IsNotNull, IsNull as IsNull, IsTrue as IsTrue, IsFalse as IsFalse, IsUnknown as IsUnknown, IsNotTrue as IsNotTrue, IsNotFalse as IsNotFalse, IsNotUnknown as IsNotUnknown, Negative as Negative +from .like import Like as Like, ILike as ILike, SimilarTo as SimilarTo +from .scalar_variable import ScalarVariable as ScalarVariable +from .alias import Alias as Alias +from .in_list import InList as InList +from .exists import Exists as Exists +from .subquery import Subquery as Subquery +from .in_subquery import InSubquery as InSubquery +from .scalar_subquery import ScalarSubquery as ScalarSubquery +from .placeholder import Placeholder as Placeholder +from .grouping_set import GroupingSet as GroupingSet +from .case import Case as Case +from .conditional_expr import CaseBuilder as CaseBuilder +from .cast import Cast as Cast, TryCast as TryCast +from .between import Between as Between +from .explain import Explain as Explain +from .limit import Limit as Limit +from .aggregate import Aggregate as Aggregate +from .sort import Sort as Sort +from .analyze import Analyze as Analyze +from .empty_relation import EmptyRelation as EmptyRelation +from .join import Join as Join, JoinType as JoinType, JoinConstraint as JoinConstraint +from .union import Union as Union +from .unnest import Unnest as Unnest +from .unnest_expr import UnnestExpr as UnnestExpr +from .extension import Extension as Extension +from .filter import Filter as Filter +from .projection import Projection as Projection +from .table_scan import TableScan as TableScan +from .create_memory_table import CreateMemoryTable as CreateMemoryTable +from .create_view import CreateView as CreateView +from .distinct import Distinct as Distinct +from .sort_expr import SortExpr as SortExpr +from .subquery_alias import SubqueryAlias as SubqueryAlias +from .drop_table import DropTable as DropTable +from .repartition import Partitioning as Partitioning, Repartition as Repartition +from .window import WindowExpr as WindowExpr, WindowFrame as WindowFrame, WindowFrameBound as WindowFrameBound \ No newline at end of file diff --git a/python/datafusion/_internal/expr/aggregate.pyi b/python/datafusion/_internal/expr/aggregate.pyi new file mode 100644 index 000000000..faec01280 --- /dev/null +++ b/python/datafusion/_internal/expr/aggregate.pyi @@ -0,0 +1,20 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema +from .base import Expr + + +class Aggregate: + def group_by_exprs(self) -> List[Expr]: ... + + def aggregate_exprs(self) -> List[Expr]: ... + + def agg_expressions(self) -> List[Expr]: ... + + def agg_func_name(self, expr: Expr) -> str: ... + + def aggregation_arguments(self, expr: Expr) -> List[Expr]: ... + + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/aggregate_expr.pyi b/python/datafusion/_internal/expr/aggregate_expr.pyi new file mode 100644 index 000000000..e543dcae5 --- /dev/null +++ b/python/datafusion/_internal/expr/aggregate_expr.pyi @@ -0,0 +1,10 @@ +from typing import List +from .base import Expr + + +class AggregateFunction: + def aggregate_type(self) -> str: ... + + def is_distinct(self) -> bool: ... + + def args(self) -> List[Expr]: ... diff --git a/python/datafusion/_internal/expr/alias.pyi b/python/datafusion/_internal/expr/alias.pyi new file mode 100644 index 000000000..6106b6067 --- /dev/null +++ b/python/datafusion/_internal/expr/alias.pyi @@ -0,0 +1,6 @@ +from .base import Expr + +class Alias: + def alias(self) -> str: ... + + def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/analyze.pyi b/python/datafusion/_internal/expr/analyze.pyi new file mode 100644 index 000000000..f253b042a --- /dev/null +++ b/python/datafusion/_internal/expr/analyze.pyi @@ -0,0 +1,11 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class Analyze: + def verbose(self) -> bool: ... + + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/base.pyi b/python/datafusion/_internal/expr/base.pyi new file mode 100644 index 000000000..845cd1c8b --- /dev/null +++ b/python/datafusion/_internal/expr/base.pyi @@ -0,0 +1,108 @@ +from typing import Any, List, Optional +import pyarrow as pa + +from ..common import RexType, DataTypeMap, NullTreatment +from .. import LogicalPlan +from .window import WindowFrame +from .sort_expr import SortExpr + + +class Expr: + def to_variant(self) -> Any: + ... + + def schema_name(self) -> str: + ... + + def canonical_name(self) -> str: + ... + + def variant_name(self) -> str: + ... + + def __richcmp__(self, other: Expr, op: int) -> Expr: ... + + def __add__(self, rhs: Expr) -> Expr: ... + + def __sub__(self, rhs: Expr) -> Expr: ... + + def __truediv__(self, rhs: Expr) -> Expr: ... + + def __mul__(self, rhs: Expr) -> Expr: ... + + def __mod__(self, rhs: Expr) -> Expr: ... + + def __and__(self, rhs: Expr) -> Expr: ... + + def __or__(self, rhs: Expr) -> Expr: ... + + def __invert__(self) -> Expr: ... + + def __getitem__(self, key: str) -> Expr: ... + + @staticmethod + def literal(value: Any) -> Expr: ... + + @staticmethod + def column(value: str) -> Expr: ... + + def alias(self, name: str) -> Expr: ... + + def sort(self, ascending: bool = True, nulls_first: bool = True) -> Expr: ... + + def is_null(self) -> Expr: ... + + def is_not_null(self) -> Expr: ... + + def cast(self, to: pa.DataType) -> Expr: ... + + def between(self, low: Expr, high: Expr, negated: bool = False) -> Expr: ... + + def rex_type(self) -> RexType: ... + + def types(self) -> DataTypeMap: ... + + def python_value(self) -> Any: ... + + def rex_call_operands(self) -> List[Expr]: ... + + def rex_call_operator(self) -> str: ... + + def column_name(self, plan: LogicalPlan) -> str: ... + + def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... + + def filter(self, filter: Expr) -> ExprFuncBuilder: ... + + def distinct(self) -> ExprFuncBuilder: ... + + def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... + + def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... + + def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... + + def over( + self, + partition_by: Optional[List[Expr]] = None, + window_frame: Optional[WindowFrame] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[NullTreatment] = None) -> Expr: + ... + +class ExprFuncBuilder: + def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... + + def filter(self, filter: Expr) -> ExprFuncBuilder: ... + + def distinct(self) -> ExprFuncBuilder: ... + + def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... + + def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... + + def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... + + def build(self) -> Expr: ... + + diff --git a/python/datafusion/_internal/expr/between.pyi b/python/datafusion/_internal/expr/between.pyi new file mode 100644 index 000000000..265010ad5 --- /dev/null +++ b/python/datafusion/_internal/expr/between.pyi @@ -0,0 +1,11 @@ +from .base import Expr + + +class Between: + def expr(self) -> Expr: ... + + def negated(self) -> bool: ... + + def low(self) -> Expr: ... + + def high(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/binary_expr.pyi b/python/datafusion/_internal/expr/binary_expr.pyi new file mode 100644 index 000000000..f179ce3ec --- /dev/null +++ b/python/datafusion/_internal/expr/binary_expr.pyi @@ -0,0 +1,10 @@ +from .base import Expr + + +class BinaryExpr: + def left(self) -> Expr: ... + + def right(self) -> Expr: ... + + def on(self) -> str: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/bool_expr.pyi b/python/datafusion/_internal/expr/bool_expr.pyi new file mode 100644 index 000000000..b5e35b2e2 --- /dev/null +++ b/python/datafusion/_internal/expr/bool_expr.pyi @@ -0,0 +1,32 @@ +from .base import Expr + + +class Not: + def expr(self) -> Expr: ... + +class IsNotNull: + def expr(self) -> Expr: ... + +class IsNull: + def expr(self) -> Expr: ... + +class IsTrue: + def expr(self) -> Expr: ... + +class IsFalse: + def expr(self) -> Expr: ... + +class IsUnknown: + def expr(self) -> Expr: ... + +class IsNotTrue: + def expr(self) -> Expr: ... + +class IsNotFalse: + def expr(self) -> Expr: ... + +class IsNotUnknown: + def expr(self) -> Expr: ... + +class Negative: + def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/case.pyi b/python/datafusion/_internal/expr/case.pyi new file mode 100644 index 000000000..f1490ce51 --- /dev/null +++ b/python/datafusion/_internal/expr/case.pyi @@ -0,0 +1,10 @@ +from typing import List, Optional, Tuple +from .base import Expr + + +class Case: + def expr(self) -> Optional[Expr]: ... + + def when_then_expr(self) -> List[Tuple[Expr, Expr]]: ... + + def else_expr(self) -> Optional[Expr]: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/cast.pyi b/python/datafusion/_internal/expr/cast.pyi new file mode 100644 index 000000000..bf18e64f1 --- /dev/null +++ b/python/datafusion/_internal/expr/cast.pyi @@ -0,0 +1,14 @@ +from .base import Expr +from ..common import DataType + + +class Cast: + def expr(self) -> Expr: ... + + def data_type(self) -> DataType: ... + + +class TryCast: + def expr(self) -> Expr: ... + + def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/column.pyi b/python/datafusion/_internal/expr/column.pyi new file mode 100644 index 000000000..7e5ff7876 --- /dev/null +++ b/python/datafusion/_internal/expr/column.pyi @@ -0,0 +1,9 @@ +from typing import Optional + +class Column: + def name(self) -> str: ... + + def relation(self) -> Optional[str]: ... + + def qualified_name(self) -> str: ... + diff --git a/python/datafusion/_internal/expr/conditional_expr.pyi b/python/datafusion/_internal/expr/conditional_expr.pyi new file mode 100644 index 000000000..01639fa57 --- /dev/null +++ b/python/datafusion/_internal/expr/conditional_expr.pyi @@ -0,0 +1,10 @@ +from .base import Expr + + +class CaseBuilder: + def when(self, when: Expr, then: Expr) -> CaseBuilder: ... + + def otherwise(self, else_expr: Expr) -> Expr: ... + + def end(self) -> Expr: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/create_memory_table.pyi b/python/datafusion/_internal/expr/create_memory_table.pyi new file mode 100644 index 000000000..fadb1cc58 --- /dev/null +++ b/python/datafusion/_internal/expr/create_memory_table.pyi @@ -0,0 +1,10 @@ +from typing import List +from .. import LogicalPlan + + +class CreateMemoryTable: + def name(self) -> str: ... + def input(self) -> List[LogicalPlan]: ... + def if_not_exists(self) -> bool: ... + def or_replace(self) -> bool: ... + def __name__(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/create_view.pyi b/python/datafusion/_internal/expr/create_view.pyi new file mode 100644 index 000000000..31ab77a81 --- /dev/null +++ b/python/datafusion/_internal/expr/create_view.pyi @@ -0,0 +1,11 @@ +from typing import List, Optional +from .. import LogicalPlan + + +class CreateView: + def name(self) -> str: ... + def input(self) -> List[LogicalPlan]: ... + def or_replace(self) -> bool: ... + def definition(self) -> Optional[str]: ... + def __name__(self) -> str: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/distinct.pyi b/python/datafusion/_internal/expr/distinct.pyi new file mode 100644 index 000000000..9a234a3bf --- /dev/null +++ b/python/datafusion/_internal/expr/distinct.pyi @@ -0,0 +1,9 @@ +from typing import List +from .. import LogicalPlan + + +class Distinct: + def input(self) -> List[LogicalPlan]: ... + + def __name__(self) -> str: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/drop_table.pyi b/python/datafusion/_internal/expr/drop_table.pyi new file mode 100644 index 000000000..8fe647b4e --- /dev/null +++ b/python/datafusion/_internal/expr/drop_table.pyi @@ -0,0 +1,12 @@ +from typing import List +from .. import LogicalPlan + + +class DropTable: + def name(self) -> str: ... + + def input(self) -> List[LogicalPlan]: ... + + def if_exists(self) -> bool: ... + + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/empty_relation.pyi b/python/datafusion/_internal/expr/empty_relation.pyi new file mode 100644 index 000000000..048e5a23e --- /dev/null +++ b/python/datafusion/_internal/expr/empty_relation.pyi @@ -0,0 +1,10 @@ +from ..common import DFSchema + + +class EmptyRelation: + def produce_one_row(self) -> bool: ... + + def schema(self) -> DFSchema: ... + + def __name__(self) -> str: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/exists.pyi b/python/datafusion/_internal/expr/exists.pyi new file mode 100644 index 000000000..0980b70a9 --- /dev/null +++ b/python/datafusion/_internal/expr/exists.pyi @@ -0,0 +1,7 @@ +from .subquery import Subquery + + +class Exists: + def subquery(self) -> Subquery: ... + + def negated(self) -> bool: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/explain.pyi b/python/datafusion/_internal/expr/explain.pyi new file mode 100644 index 000000000..6feae6401 --- /dev/null +++ b/python/datafusion/_internal/expr/explain.pyi @@ -0,0 +1,16 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class Explain: + def explain_string(self) -> List[str]: ... + + def verbose(self) -> bool: ... + + def plan(self) -> LogicalPlan: ... + + def schema(self) -> DFSchema: ... + + def logical_optimization_succceeded(self) -> bool: ... + diff --git a/python/datafusion/_internal/expr/extension.pyi b/python/datafusion/_internal/expr/extension.pyi new file mode 100644 index 000000000..3da0c6d1e --- /dev/null +++ b/python/datafusion/_internal/expr/extension.pyi @@ -0,0 +1,2 @@ +class Extension: + def name(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/filter.pyi b/python/datafusion/_internal/expr/filter.pyi new file mode 100644 index 000000000..a5ba1332b --- /dev/null +++ b/python/datafusion/_internal/expr/filter.pyi @@ -0,0 +1,12 @@ +from typing import List +from .base import Expr +from .. import LogicalPlan +from ..common import DFSchema + + +class Filter: + def predicate(self) -> Expr: ... + + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/grouping_set.pyi b/python/datafusion/_internal/expr/grouping_set.pyi new file mode 100644 index 000000000..061c34866 --- /dev/null +++ b/python/datafusion/_internal/expr/grouping_set.pyi @@ -0,0 +1,2 @@ +class GroupingSet: + ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/in_list.pyi b/python/datafusion/_internal/expr/in_list.pyi new file mode 100644 index 000000000..98c8c362c --- /dev/null +++ b/python/datafusion/_internal/expr/in_list.pyi @@ -0,0 +1,9 @@ +from typing import List +from .base import Expr + +class InList: + def expr(self) -> Expr: ... + + def list(self) -> List[Expr]: ... + + def negated(self) -> bool: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/in_subquery.pyi b/python/datafusion/_internal/expr/in_subquery.pyi new file mode 100644 index 000000000..dda995df2 --- /dev/null +++ b/python/datafusion/_internal/expr/in_subquery.pyi @@ -0,0 +1,11 @@ +from .base import Expr +from .subquery import Subquery + + +class InSubquery: + def expr(self) -> Expr: ... + + def subquery(self) -> Subquery: ... + + def negated(self) -> bool: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/join.pyi b/python/datafusion/_internal/expr/join.pyi new file mode 100644 index 000000000..8819dd27b --- /dev/null +++ b/python/datafusion/_internal/expr/join.pyi @@ -0,0 +1,32 @@ +from typing import List, Optional, Tuple +from .. import LogicalPlan +from .base import Expr +from ..common import DFSchema + + +class JoinType: + def is_outer(self) -> bool: ... + +class JoinConstraint: + ... + + +class Join: + def left(self) -> LogicalPlan: ... + + def right(self) -> LogicalPlan: ... + + def on(self) -> List[Tuple[Expr, Expr]]: ... + + def filter(self) -> Optional[Expr]: ... + + def join_type(self) -> JoinType: ... + + def join_constraint(self) -> JoinConstraint: ... + + def schema(self) -> DFSchema: ... + + def null_equals_null(self) -> bool: ... + + def __name__(self) -> str: ... + diff --git a/python/datafusion/_internal/expr/like.pyi b/python/datafusion/_internal/expr/like.pyi new file mode 100644 index 000000000..6daf940d5 --- /dev/null +++ b/python/datafusion/_internal/expr/like.pyi @@ -0,0 +1,31 @@ +from typing import Optional +from .base import Expr + + +class Like: + def negated(self) -> bool: ... + + def expr(self) -> Expr: ... + + def pattern(self) -> Expr: ... + + def escape_char(self) -> Optional[str]: ... + + +class ILike: + def negated(self) -> bool: ... + + def expr(self) -> Expr: ... + + def pattern(self) -> Expr: ... + + def escape_char(self) -> Optional[str]: ... + +class SimilarTo: + def negated(self) -> bool: ... + + def expr(self) -> Expr: ... + + def pattern(self) -> Expr: ... + + def escape_char(self) -> Optional[str]: ... diff --git a/python/datafusion/_internal/expr/limit.pyi b/python/datafusion/_internal/expr/limit.pyi new file mode 100644 index 000000000..bb3c5d691 --- /dev/null +++ b/python/datafusion/_internal/expr/limit.pyi @@ -0,0 +1,9 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class Limit: + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/literal.pyi b/python/datafusion/_internal/expr/literal.pyi new file mode 100644 index 000000000..2600e2f5b --- /dev/null +++ b/python/datafusion/_internal/expr/literal.pyi @@ -0,0 +1,44 @@ +from typing import Optional, Tuple + + +class Literal: + def data_type(self) -> str: ... + + def value_f32(self) -> Optional[float]: ... + + def value_f64(self) -> Optional[float]: ... + + def value_decimal128(self) -> Tuple[Optional[int], int, int]: ... + + def value_i8(self) -> Optional[int]: ... + + def value_i16(self) -> Optional[int]: ... + + def value_i32(self) -> Optional[int]: ... + + def value_i64(self) -> Optional[int]: ... + + def value_u8(self) -> Optional[int]: ... + + def value_u16(self) -> Optional[int]: ... + + def value_u32(self) -> Optional[int]: ... + + def value_u64(self) -> Optional[int]: ... + + def value_date32(self) -> Optional[int]: ... + + def value_date64(self) -> Optional[int]: ... + + def value_time64(self) -> Optional[int]: ... + + def value_timestamp(self) -> Tuple[Optional[int], Optional[int]]: ... + + def value_bool(self) -> Optional[bool]: ... + + def value_string(self) -> Optional[str]: ... + + def value_interval_day_time(self) -> Optional[Tuple[int, int]]: ... + + def into_type(self) -> Literal: ... + diff --git a/python/datafusion/_internal/expr/placeholder.pyi b/python/datafusion/_internal/expr/placeholder.pyi new file mode 100644 index 000000000..811c9e2fb --- /dev/null +++ b/python/datafusion/_internal/expr/placeholder.pyi @@ -0,0 +1,6 @@ +from ..common import DataType + + +class Placeholder: + def id(self) -> str: ... + def data_type(self) -> DataType: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/projection.pyi b/python/datafusion/_internal/expr/projection.pyi new file mode 100644 index 000000000..42d4621e3 --- /dev/null +++ b/python/datafusion/_internal/expr/projection.pyi @@ -0,0 +1,15 @@ +from typing import List +from .base import Expr +from .. import LogicalPlan +from ..common import DFSchema + + +class Projection: + def projections(self) -> List[Expr]: ... + + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... + + def __name__(self) -> str: ... + \ No newline at end of file diff --git a/python/datafusion/_internal/expr/repartition.pyi b/python/datafusion/_internal/expr/repartition.pyi new file mode 100644 index 000000000..00b6f4033 --- /dev/null +++ b/python/datafusion/_internal/expr/repartition.pyi @@ -0,0 +1,14 @@ +from typing import List +from .. import LogicalPlan +from .base import Expr + + +class Repartition: + def input(self) -> List[LogicalPlan]: ... + def partitioning_scheme(self) -> Partitioning: ... + def distribute_list(self) -> List[Expr]: ... + def distribute_columns(self) -> str: ... + def __name__(self) -> str: ... + +class Partitioning: + ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/scalar_subquery.pyi b/python/datafusion/_internal/expr/scalar_subquery.pyi new file mode 100644 index 000000000..3146859b0 --- /dev/null +++ b/python/datafusion/_internal/expr/scalar_subquery.pyi @@ -0,0 +1,5 @@ +from .subquery import Subquery + + +class ScalarSubquery: + def subquery(self) -> Subquery: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/scalar_variable.pyi b/python/datafusion/_internal/expr/scalar_variable.pyi new file mode 100644 index 000000000..ae0022953 --- /dev/null +++ b/python/datafusion/_internal/expr/scalar_variable.pyi @@ -0,0 +1,10 @@ +from typing import List +from ..common import DataType + + +class ScalarVariable: + def data_type(self) -> DataType: + ... + + def variables(self) -> List[str]: + ... diff --git a/python/datafusion/_internal/expr/sort.pyi b/python/datafusion/_internal/expr/sort.pyi new file mode 100644 index 000000000..12f899c86 --- /dev/null +++ b/python/datafusion/_internal/expr/sort.pyi @@ -0,0 +1,15 @@ +from typing import List, Optional + +from .. import LogicalPlan +from ..common import DFSchema +from .sort_expr import SortExpr + + +class Sort: + def sort_exprs(self) -> List[SortExpr]: ... + + def get_fetch_val(self) -> Optional[int]: ... + + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/sort_expr.pyi b/python/datafusion/_internal/expr/sort_expr.pyi new file mode 100644 index 000000000..c8d93bc4f --- /dev/null +++ b/python/datafusion/_internal/expr/sort_expr.pyi @@ -0,0 +1,16 @@ +from .base import Expr + + +class SortExpr: + def __init__( + self, + expr: Expr, + asc: bool, + nulls_first: bool, + ) -> None: ... + + def expr(self) -> Expr: ... + + def ascending(self) -> bool: ... + + def nulls_first(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/subquery.pyi b/python/datafusion/_internal/expr/subquery.pyi new file mode 100644 index 000000000..74768a2c1 --- /dev/null +++ b/python/datafusion/_internal/expr/subquery.pyi @@ -0,0 +1,9 @@ +from typing import List +from .. import LogicalPlan + +class Subquery: + def input(self) -> List[LogicalPlan]: + ... + + def __name__(self) -> str: + ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/subquery_alias.pyi b/python/datafusion/_internal/expr/subquery_alias.pyi new file mode 100644 index 000000000..164c05aab --- /dev/null +++ b/python/datafusion/_internal/expr/subquery_alias.pyi @@ -0,0 +1,13 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class SubqueryAlias: + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... + + def alias(self) -> str: ... + + def __name__(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/table_scan.pyi b/python/datafusion/_internal/expr/table_scan.pyi new file mode 100644 index 000000000..36d80dc1b --- /dev/null +++ b/python/datafusion/_internal/expr/table_scan.pyi @@ -0,0 +1,18 @@ +from typing import List, Optional, Tuple +from ..common import DFSchema +from .base import Expr + + +class TableScan: + def table_name(self) -> str: ... + + def fqn(self) -> Tuple[Optional[str], Optional[str], str]: ... + + def projection(self) -> List[Tuple[int, str]]: ... + + def schema(self) -> DFSchema: ... + + def filters(self) -> List[Expr]: ... + + def fetch(self) -> Optional[int]: ... + diff --git a/python/datafusion/_internal/expr/union.pyi b/python/datafusion/_internal/expr/union.pyi new file mode 100644 index 000000000..051f90edd --- /dev/null +++ b/python/datafusion/_internal/expr/union.pyi @@ -0,0 +1,11 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class Union: + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... + + def __name__(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/unnest.pyi b/python/datafusion/_internal/expr/unnest.pyi new file mode 100644 index 000000000..13252e213 --- /dev/null +++ b/python/datafusion/_internal/expr/unnest.pyi @@ -0,0 +1,11 @@ +from typing import List +from .. import LogicalPlan +from ..common import DFSchema + + +class Unnest: + def input(self) -> List[LogicalPlan]: ... + + def schema(self) -> DFSchema: ... + + def __name__(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/unnest_expr.pyi b/python/datafusion/_internal/expr/unnest_expr.pyi new file mode 100644 index 000000000..50e08808b --- /dev/null +++ b/python/datafusion/_internal/expr/unnest_expr.pyi @@ -0,0 +1,7 @@ +from .base import Expr + + +class UnnestExpr: + def expr(self) -> Expr: ... + + def __name__(self) -> str: ... \ No newline at end of file diff --git a/python/datafusion/_internal/expr/window.pyi b/python/datafusion/_internal/expr/window.pyi new file mode 100644 index 000000000..03cac176c --- /dev/null +++ b/python/datafusion/_internal/expr/window.pyi @@ -0,0 +1,48 @@ +from typing import Any, List, Optional +from ..common import DFSchema +from .base import Expr +from .sort_expr import SortExpr + + +class WindowExpr: + def schema(self) -> DFSchema: ... + + def get_window_expr(self) -> List[Expr]: ... + + def get_sort_exprs(self, expr: Expr) -> List[SortExpr]: ... + + def get_partition_exprs(self, expr: Expr) -> List[Expr]: ... + + def get_args(self, expr: Expr) -> List[Expr]: ... + + def window_func_name(self, expr: Expr) -> str: ... + + def get_frame(self, expr: Expr) -> Optional[WindowFrame]: ... + + +class WindowFrame: + def __init__( + self, + unit: str, + start_bound: Optional[Any], + end_bound: Optional[Any], + ) -> None: ... + + def get_frame_units(self) -> str: ... + + def get_lower_bound(self) -> WindowFrameBound: ... + + def get_upper_bound(self) -> WindowFrameBound: ... + + +class WindowFrameBound: + def is_current_row(self) -> bool: ... + + def is_preceding(self) -> bool: ... + + def is_following(self) -> bool: ... + + def get_offset(self) -> Optional[int]: ... + + def is_unbounded(self) -> bool: ... + diff --git a/python/datafusion/_internal/functions/__init__.pyi b/python/datafusion/_internal/functions/__init__.pyi new file mode 100644 index 000000000..97b523db1 --- /dev/null +++ b/python/datafusion/_internal/functions/__init__.pyi @@ -0,0 +1,376 @@ +from typing import Any, List, Optional, Protocol +from ..expr import Expr, SortExpr, CaseBuilder, WindowFrame +from .. import SessionContext +from ...common import NullTreatment + +def in_list(expr: Expr, value: List[Expr], negated: bool) -> Expr: ... + +def make_array(exprs: List[Expr]) -> Expr: ... + +def array_concat(exprs: List[Expr]) -> Expr: ... + +def array_cat(exprs: List[Expr]) -> Expr: ... + +def array_position(array: Expr, element: Expr, index: Optional[int] = None) -> Expr: ... + +def array_slice(array: Expr, begin: Expr, end: Expr, stride: Optional[Expr] = None) -> Expr: ... + +def digest(value: Expr, method: Expr) -> Expr: ... + +def concat(args: List[Expr]) -> Expr: ... + +def concat_ws(sep: str, args: List[Expr]) -> Expr: ... + +def regexp_like(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... + +def regexp_match(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... + +def regexp_replace(string: Expr, pattern: Expr, replacement: Expr, flags: Optional[Expr] = None) -> Expr: ... + +def order_by(expr: Expr, asc: bool, nulls_first: bool) -> SortExpr: ... + +def alias(expr: Expr, name: str) -> Expr: ... + +def col(name: str) -> Expr: ... + +def case(expr: Expr) -> CaseBuilder: ... + +def when(when: Expr, then: Expr) -> CaseBuilder: ... + +def window(name: str, args: List[Expr], partition_by: Optional[List[Expr]], order_by: Optional[List[SortExpr]], window_frame: Optional[WindowFrame], ctx: Optional[SessionContext]) -> Expr: ... + +def abs(num: Expr) -> Expr: ... + +def acos(num: Expr) -> Expr: ... + +def acosh(num: Expr) -> Expr: ... + +def ascii(num: Expr) -> Expr: ... + +def asin(num: Expr) -> Expr: ... + +def asinh(num: Expr) -> Expr: ... + +def atan(num: Expr) -> Expr: ... + +def atanh(num: Expr) -> Expr: ... + +def atan2(y: Expr, x: Expr) -> Expr: ... + +def bit_length(arg: Expr) -> Expr: ... + +def btrim(*args: Expr) -> Expr: ... + +def cbrt(num: Expr) -> Expr: ... + +def ceil(num: Expr) -> Expr: ... + +def character_length(string: Expr) -> Expr: ... + +def length(string: Expr) -> Expr: ... + +def char_length(string: Expr) -> Expr: ... + +def chr(arg: Expr) -> Expr: ... + +def coalesce(*args: Expr) -> Expr: ... + +def cos(num: Expr) -> Expr: ... + +def cosh(num: Expr) -> Expr: ... + +def cot(num: Expr) -> Expr: ... + +def degrees(num: Expr) -> Expr: ... + +def decode(input: Expr, encoding: Expr) -> Expr: ... + +def encode(input: Expr, encoding: Expr) -> Expr: ... + +def ends_with(string: Expr, suffix: Expr) -> Expr: ... + +def exp(num: Expr) -> Expr: ... + +def factorial(num: Expr) -> Expr: ... + +def floor(num: Expr) -> Expr: ... + +def gcd(x: Expr, y: Expr) -> Expr: ... + +def initcap(string: Expr) -> Expr: ... + +def isnan(num: Expr) -> Expr: ... + +def iszero(num: Expr) -> Expr: ... + +def levenshtein(string1: Expr, string2: Expr) -> Expr: ... + +def lcm(x: Expr, y: Expr) -> Expr: ... + +def left(string: Expr, n: Expr) -> Expr: ... + +def ln(num: Expr) -> Expr: ... + +def log(base: Expr, num: Expr) -> Expr: ... + +def log10(num: Expr) -> Expr: ... + +def log2(num: Expr) -> Expr: ... + +def lower(arg1: Expr) -> Expr: ... + +def lpad(*args: Expr) -> Expr: ... + +def ltrim(*args: Expr) -> Expr: ... + +def md5(input_arg: Expr) -> Expr: ... + +def nanvl(x: Expr, y: Expr) -> Expr: ... + +def nvl(x: Expr, y: Expr) -> Expr: ... + +def nullif(arg_1: Expr, arg_2: Expr) -> Expr: ... + +def octet_length(args: Expr) -> Expr: ... + +def overlay(*args: Expr) -> Expr: ... + +def pi() -> Expr: ... + +def power(base: Expr, exponent: Expr) -> Expr: ... + +def radians(num: Expr) -> Expr: ... + +def repeat(string: Expr, n: Expr) -> Expr: ... + +def replace(string: Expr, from_: Expr, to: Expr) -> Expr: ... + +def reverse(string: Expr) -> Expr: ... + +def right(string: Expr, n: Expr) -> Expr: ... + +def round(*args: Expr) -> Expr: ... + +def rpad(*args: Expr) -> Expr: ... + +def rtrim(*args: Expr) -> Expr: ... + +def sha224(input_arg1: Expr) -> Expr: ... + +def sha256(input_arg1: Expr) -> Expr: ... + +def sha384(input_arg1: Expr) -> Expr: ... + +def sha512(input_arg1: Expr) -> Expr: ... + +def signum(num: Expr) -> Expr: ... + +def sin(num: Expr) -> Expr: ... + +def sinh(num: Expr) -> Expr: ... + +def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr: ... + +def sqrt(num: Expr) -> Expr: ... + +def starts_with(string: Expr, prefix: Expr) -> Expr: ... + +def strpos(string: Expr, substring: Expr) -> Expr: ... + +def substr(string: Expr, position: Expr) -> Expr: ... + +def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr: ... + +def substring(string: Expr, position: Expr, length: Expr) -> Expr: ... + +def find_in_set(string: Expr, string_list: Expr) -> Expr: ... + +def tan(num: Expr) -> Expr: ... + +def tanh(num: Expr) -> Expr: ... + +def to_hex(arg1: Expr) -> Expr: ... + +def now() -> Expr: ... + +def to_timestamp(*args: Expr) -> Expr: ... + +def to_timestamp_millis(*args: Expr) -> Expr: ... + +def to_timestamp_micros(*args: Expr) -> Expr: ... + +def to_timestamp_seconds(*args: Expr) -> Expr: ... + +def to_unixtime(*args: Expr) -> Expr: ... + +def current_date() -> Expr: ... + +def current_time() -> Expr: ... + +def date_part(part: Expr, date: Expr) -> Expr: ... + +def date_trunc(part: Expr, date: Expr) -> Expr: ... + +def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr: ... + +def make_date(year: Expr, month: Expr, day: Expr) -> Expr: ... + +def translate(string: Expr, from_: Expr, to: Expr) -> Expr: ... + +def trim(*args: Expr) -> Expr: ... + +def trunc(*args: Expr) -> Expr: ... + +def upper(arg1: Expr) -> Expr: ... + +def uuid() -> Expr: ... + +def struct(*args: Expr) -> Expr: ... + +def named_struct(*args: Expr) -> Expr: ... + +def from_unixtime(unixtime: Expr) -> Expr: ... + +def arrow_typeof(arg_1: Expr) -> Expr: ... + +def arrow_cast(arg_1: Expr, datatype: Expr) -> Expr: ... + +def random() -> Expr: ... + +def array_append(array: Expr, element: Expr) -> Expr: ... + +def array_to_string(array: Expr, delimiter: Expr) -> Expr: ... + +def array_dims(array: Expr) -> Expr: ... + +def array_distinct(array: Expr) -> Expr: ... + +def array_element(array: Expr, element: Expr) -> Expr: ... + +def array_empty(array: Expr) -> Expr: ... + +def array_length(array: Expr) -> Expr: ... + +def array_has(first_array: Expr, second_array: Expr) -> Expr: ... + +def array_has_all(first_array: Expr, second_array: Expr) -> Expr: ... + +def array_has_any(first_array: Expr, second_array: Expr) -> Expr: ... + +def array_positions(array: Expr, element: Expr) -> Expr: ... + +def array_ndims(array: Expr) -> Expr: ... + +def array_prepend(element: Expr, array: Expr) -> Expr: ... + +def array_pop_back(array: Expr) -> Expr: ... + +def array_pop_front(array: Expr) -> Expr: ... + +def array_remove(array: Expr, element: Expr) -> Expr: ... + +def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: ... + +def array_remove_all(array: Expr, element: Expr) -> Expr: ... + +def array_repeat(element: Expr, count: Expr) -> Expr: ... + +def array_replace(array: Expr, from_: Expr, to: Expr) -> Expr: ... + +def array_replace_n(array: Expr, from_: Expr, to: Expr, mx: Expr) -> Expr: ... + +def array_replace_all(array: Expr, from_: Expr, to: Expr) -> Expr: ... + +def array_sort(array: Expr, desc: Expr, null_first: Expr) -> Expr: ... + +def array_intersect(first_array: Expr, second_array: Expr) -> Expr: ... + +def array_union(array1: Expr, array2: Expr) -> Expr: ... + +def array_except(first_array: Expr, second_array: Expr) -> Expr: ... + +def array_resize(array: Expr, size: Expr, value: Expr) -> Expr: ... + +def cardinality(array: Expr) -> Expr: ... + +def flatten(array: Expr) -> Expr: ... + +def range(start: Expr, stop: Expr, step: Expr) -> Expr: ... + +class AggregateFunction(Protocol): + def __call__(self, exp: Expr, *, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: + ... + +class AggregateFunctionYX(Protocol): + def __call__(self, y: Expr, x: Expr, *, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: + ... + +array_agg: AggregateFunction +max: AggregateFunction +min: AggregateFunction +avg: AggregateFunction +sum: AggregateFunction +bit_and: AggregateFunction +bit_or: AggregateFunction +bit_xor: AggregateFunction +bool_and: AggregateFunction +bool_or: AggregateFunction +corr: AggregateFunctionYX +count: AggregateFunction +covar_samp: AggregateFunctionYX +covar_pop: AggregateFunctionYX +median: AggregateFunction +regr_slope: AggregateFunctionYX +regr_intercept: AggregateFunctionYX +regr_count: AggregateFunctionYX +regr_r2: AggregateFunctionYX +regr_avgx: AggregateFunctionYX +regr_avgy: AggregateFunctionYX +regr_sxx: AggregateFunctionYX +regr_syy: AggregateFunctionYX +regr_sxy: AggregateFunctionYX +stddev: AggregateFunction +stddev_pop: AggregateFunction +var_sample: AggregateFunction +var_pop: AggregateFunction +approx_distinct: AggregateFunction +approx_median: AggregateFunction + +def approx_percentile_cont(expression: Expr, percentile: float, num_centroids: Optional[int] = None, filter: Optional[Expr] = None) -> Expr: ... + +def approx_percentile_cont_with_weight( + expression: Expr, + weight: Expr, + percentile: float, + filter: Optional[Expr] = None, +) -> Expr: ... + +last_value: AggregateFunction + +def first_value(expr: Expr, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: ... + +def nth_value(expr: Expr, n: int, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: + ... + +def string_agg(expr: Expr, delimiter: str, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: + ... + +def lead(arg: Expr, shift_offset: int, default_value: Optional[Any] = None, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: + ... + +def lag(arg: Expr, shift_offset: int, default_value: Optional[Any] = None, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: + ... + +class WindowFunction(Protocol): + def __call__(self, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: + ... + +row_number: WindowFunction +rank: WindowFunction +dense_rank: WindowFunction +percent_rank: WindowFunction +cume_dist: WindowFunction + +def ntile(arg: Expr, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: + ... + diff --git a/python/datafusion/_internal/object_store/__init__.pyi b/python/datafusion/_internal/object_store/__init__.pyi new file mode 100644 index 000000000..d700b77dd --- /dev/null +++ b/python/datafusion/_internal/object_store/__init__.pyi @@ -0,0 +1,43 @@ +from typing import List, Optional, Tuple + + +class LocalFileSystem: + def __init__(self, prefix: Optional[str] = None) -> None: ... + +class MicrosoftAzure: + def __init__( + self, + container_name: str, + account: Optional[str] = None, + access_key: Optional[str] = None, + bearer_token: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + tenant_id: Optional[str] = None, + sas_query_pairs: Optional[List[Tuple[str, str]]] = None, + use_emulator: Optional[bool] = None, + allow_http: Optional[bool] = None + ) -> None: ... + +class GoogleCloud: + def __init__( + self, + bucket_name: str, + service_account_path: Optional[str] = None, + ) -> None: ... + +class AmazonS3: + def __init__( + self, + bucket_name: str, + region: Optional[str] = None, + access_key_id: Optional[str] = None, + secret_access_key: Optional[str] = None, + endpoint: Optional[str] = None, + allow_http: bool = False, + imdsv1_fallback: bool = False, + ) -> None: ... + +class Http: + def __init__(self, url: str) -> None: ... + diff --git a/python/datafusion/_internal/substrait/__init__.pyi b/python/datafusion/_internal/substrait/__init__.pyi new file mode 100644 index 000000000..3c68732b2 --- /dev/null +++ b/python/datafusion/_internal/substrait/__init__.pyi @@ -0,0 +1,37 @@ +from .. import SessionContext, LogicalPlan + + +class Plan: + def encode(self) -> bytes: + ... + +class Serde: + @staticmethod + def serialize(sql: str, ctx: SessionContext, path: str): + ... + + @staticmethod + def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: + ... + + @staticmethod + def serialize_bytes(sql: str, ctx: SessionContext) -> bytes: + ... + + @staticmethod + def deserialize(path: str) -> Plan: + ... + + @staticmethod + def deserialize_bytes(proto_bytes: bytes) -> Plan: + ... + +class Producer: + @staticmethod + def to_substrait_plan(plan: LogicalPlan, ctx: SessionContext) -> Plan: + ... + +class Consumer: + @staticmethod + def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: + ... diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index acd28f33d..5a38c5727 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -73,4 +73,4 @@ def schema(self) -> pyarrow.Schema: @property def kind(self) -> str: """Returns the kind of table.""" - return self.table.kind() + return self.table.kind diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 3c284c9f9..b899e9d67 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -30,6 +30,8 @@ from datafusion.record_batch import RecordBatchStream from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF +import pyarrow.dataset + from typing import Any, TYPE_CHECKING, Protocol from typing_extensions import deprecated @@ -487,10 +489,10 @@ def __init__( ctx = SessionContext() df = ctx.read_csv("data.csv") """ - config = config.config_internal if config is not None else None - runtime = runtime.config_internal if runtime is not None else None + config_internal = config.config_internal if config is not None else None + runtime_internal = runtime.config_internal if runtime is not None else None - self.ctx = SessionContextInternal(config, runtime) + self.ctx = SessionContextInternal(config_internal, runtime_internal) def enable_url_table(self) -> "SessionContext": """Control if local files can be queried as tables. @@ -710,7 +712,7 @@ def register_table(self, name: str, table: Table) -> None: name: Name of the resultant table. table: DataFusion table to add to the session context. """ - self.ctx.register_table(name, table) + self.ctx.register_table(name, table.table) def deregister_table(self, name: str) -> None: """Remove a table from the session.""" @@ -749,7 +751,7 @@ def register_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[SortExpr]] | None = None, ) -> None: """Register a Parquet file as a table. @@ -780,7 +782,7 @@ def register_parquet( file_extension, skip_metadata, schema, - file_sort_order, + [[expr.raw_sort for expr in exprs] for exprs in file_sort_order] if file_sort_order is not None else None, ) def register_csv( @@ -814,13 +816,13 @@ def register_csv( file_compression_type: File compression type. """ if isinstance(path, list): - path = [str(p) for p in path] + path_inner = [str(p) for p in path] else: - path = str(path) + path_inner = str(path) self.ctx.register_csv( name, - path, + path_inner, schema, has_header, delimiter, @@ -916,7 +918,7 @@ def register_udwf(self, udwf: WindowUDF) -> None: def catalog(self, name: str = "datafusion") -> Catalog: """Retrieve a catalog by name.""" - return self.ctx.catalog(name) + return Catalog(self.ctx.catalog(name)) @deprecated( "Use the catalog provider interface ``SessionContext.Catalog`` to " @@ -1013,11 +1015,11 @@ def read_csv( if table_partition_cols is None: table_partition_cols = [] - path = [str(p) for p in path] if isinstance(path, list) else str(path) + path_inner = [str(p) for p in path] if isinstance(path, list) else str(path) return DataFrame( self.ctx.read_csv( - path, + path_inner, schema, has_header, delimiter, @@ -1036,7 +1038,7 @@ def read_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -1060,6 +1062,11 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] + file_sort_order_raw = ( + [sort_list_to_raw_sort_list(f) for f in file_sort_order] + if file_sort_order is not None + else None + ) return DataFrame( self.ctx.read_parquet( str(path), @@ -1068,7 +1075,7 @@ def read_parquet( file_extension, skip_metadata, schema, - file_sort_order, + file_sort_order_raw, ) ) @@ -1103,7 +1110,7 @@ def read_table(self, table: Table) -> DataFrame: :py:class:`~datafusion.catalog.ListingTable`, create a :py:class:`~datafusion.dataframe.DataFrame`. """ - return DataFrame(self.ctx.read_table(table)) + return DataFrame(self.ctx.read_table(table.table)) def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream: """Execute the ``plan`` and return the results.""" diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 0b38db924..017a51829 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -34,6 +34,7 @@ from typing import Callable, Sequence from datafusion._internal import DataFrame as DataFrameInternal +from datafusion._internal import expr as expr_internal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -200,7 +201,7 @@ def with_columns( def _simplify_expression( *exprs: Expr | Iterable[Expr], **named_exprs: Expr - ) -> list[Expr]: + ) -> list[expr_internal.Expr]: expr_list = [] for expr in exprs: if isinstance(expr, Expr): @@ -251,9 +252,9 @@ def aggregate( group_by = group_by if isinstance(group_by, list) else [group_by] aggs = aggs if isinstance(aggs, list) else [aggs] - group_by = [e.expr for e in group_by] - aggs = [e.expr for e in aggs] - return DataFrame(self.df.aggregate(group_by, aggs)) + group_by_inner = [e.expr for e in group_by] + aggs_inner = [e.expr for e in aggs] + return DataFrame(self.df.aggregate(group_by_inner, aggs_inner)) def sort(self, *exprs: Expr | SortExpr) -> DataFrame: """Sort the DataFrame by the specified sorting expressions. @@ -451,8 +452,8 @@ def join( raise ValueError( "`left_on` or `right_on` should not provided with `on`" ) - left_on = on - right_on = on + left_on = on # type: ignore + right_on = on # type: ignore elif left_on is not None or right_on is not None: if left_on is None or right_on is None: raise ValueError("`left_on` and `right_on` should both be provided.") @@ -465,7 +466,7 @@ def join( if isinstance(right_on, str): right_on = [right_on] - return DataFrame(self.df.join(right.df, how, left_on, right_on)) + return DataFrame(self.df.join(right.df, how, left_on, right_on)) # type: ignore def join_on( self, @@ -551,8 +552,8 @@ def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame: Returns: Repartitioned DataFrame. """ - exprs = [expr.expr for expr in exprs] - return DataFrame(self.df.repartition_by_hash(*exprs, num=num)) + exprs_inner = [expr.expr for expr in exprs] + return DataFrame(self.df.repartition_by_hash(*exprs_inner, num=num)) def union(self, other: DataFrame, distinct: bool = False) -> DataFrame: """Calculate the union of two :py:class:`DataFrame`. @@ -724,8 +725,8 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram Returns: A DataFrame with the columns expanded. """ - columns = [c for c in columns] - return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls)) + columns_inner = [c for c in columns] + return DataFrame(self.df.unnest_columns(columns_inner, preserve_nulls=preserve_nulls)) def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any: """Export an Arrow PyCapsule Stream. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 16add16f4..6b8163aff 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -171,14 +171,14 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: """Helper function to return a default Sort if an Expr is provided.""" if isinstance(e, SortExpr): return e.raw_sort - return SortExpr(e.expr, True, True).raw_sort + return SortExpr(e, True, True).raw_sort def sort_list_to_raw_sort_list( - sort_list: Optional[list[Expr | SortExpr]], -) -> Optional[list[expr_internal.SortExpr]]: + sort_list: list[Expr | SortExpr], +) -> list[expr_internal.SortExpr]: """Helper function to return an optional sort list to raw variant.""" - return [sort_or_default(e) for e in sort_list] if sort_list is not None else None + return [sort_or_default(e) for e in sort_list] class Expr: @@ -226,7 +226,7 @@ def variant_name(self) -> str: def __richcmp__(self, other: Expr, op: int) -> Expr: """Comparison operator.""" - return Expr(self.expr.__richcmp__(other, op)) + return Expr(self.expr.__richcmp__(other.expr, op)) def __repr__(self) -> str: """Generate a string representation of this expression.""" @@ -306,23 +306,23 @@ def __getitem__(self, key: str | int) -> Expr: ) return Expr(self.expr.__getitem__(key)) - def __eq__(self, rhs: Any) -> Expr: + def __eq__(self, rhs: Any) -> Expr: # type: ignore """Equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__eq__(rhs.expr)) + return Expr(self.expr.__eq__(rhs.expr)) # type: ignore - def __ne__(self, rhs: Any) -> Expr: + def __ne__(self, rhs: Any) -> Expr: # type: ignore """Not equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ne__(rhs.expr)) + return Expr(self.expr.__ne__(rhs.expr)) # type: ignore def __ge__(self, rhs: Any) -> Expr: """Greater than or equal to. @@ -331,7 +331,7 @@ def __ge__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ge__(rhs.expr)) + return Expr(self.expr.__ge__(rhs.expr)) # type: ignore def __gt__(self, rhs: Any) -> Expr: """Greater than. @@ -340,7 +340,7 @@ def __gt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__gt__(rhs.expr)) + return Expr(self.expr.__gt__(rhs.expr)) # type: ignore def __le__(self, rhs: Any) -> Expr: """Less than or equal to. @@ -349,7 +349,7 @@ def __le__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__le__(rhs.expr)) + return Expr(self.expr.__le__(rhs.expr)) # type: ignore def __lt__(self, rhs: Any) -> Expr: """Less than. @@ -358,7 +358,7 @@ def __lt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__lt__(rhs.expr)) + return Expr(self.expr.__lt__(rhs.expr)) # type: ignore __radd__ = __add__ __rand__ = __and__ @@ -412,7 +412,7 @@ def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: ascending: If true, sort in ascending order. nulls_first: Return null values first. """ - return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(self, ascending=ascending, nulls_first=nulls_first) def is_null(self) -> Expr: """Returns ``True`` if this expression is null.""" @@ -584,7 +584,7 @@ def over(self, window: Window) -> Expr: window: Window definition """ partition_by_raw = expr_list_to_raw_expr_list(window._partition_by) - order_by_raw = sort_list_to_raw_sort_list(window._order_by) + order_by_raw = sort_list_to_raw_sort_list(window._order_by) if window._order_by else None window_frame_raw = ( window._window_frame.window_frame if window._window_frame is not None @@ -784,7 +784,7 @@ class SortExpr: def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: """This constructor should not be called by the end user.""" - self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) + self.raw_sort = expr_internal.SortExpr(expr.expr, ascending, nulls_first) def expr(self) -> Expr: """Return the raw expr backing the SortExpr.""" diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index c0097c6ab..440bc1fc6 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -332,8 +332,8 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr: def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr: """Returns whether the argument is contained within the list ``values``.""" - values = [v.expr for v in values] - return Expr(f.in_list(arg.expr, values, negated)) + values_inner = [v.expr for v in values] + return Expr(f.in_list(arg.expr, values_inner, negated)) def digest(value: Expr, method: Expr) -> Expr: @@ -350,8 +350,8 @@ def concat(*args: Expr) -> Expr: NULL arguments are ignored. """ - args = [arg.expr for arg in args] - return Expr(f.concat(args)) + args_inner = [arg.expr for arg in args] + return Expr(f.concat(args_inner)) def concat_ws(separator: str, *args: Expr) -> Expr: @@ -359,13 +359,13 @@ def concat_ws(separator: str, *args: Expr) -> Expr: ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. """ - args = [arg.expr for arg in args] - return Expr(f.concat_ws(separator, args)) + args_inner = [arg.expr for arg in args] + return Expr(f.concat_ws(separator, args_inner)) def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a new sort expression.""" - return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) def alias(expr: Expr, name: str) -> Expr: @@ -428,12 +428,12 @@ def window( df.select(functions.lag(col("a")).partition_by(col("b")).build()) """ - args = [a.expr for a in args] - partition_by = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - window_frame = window_frame.window_frame if window_frame is not None else None - ctx = ctx.ctx if ctx is not None else None - return Expr(f.window(name, args, partition_by, order_by_raw, window_frame, ctx)) + args_inner = [a.expr for a in args] + partition_by_inner = expr_list_to_raw_expr_list(partition_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + window_frame_inner = window_frame.window_frame if window_frame is not None else None + ctx_inner = ctx.ctx if ctx is not None else None + return Expr(f.window(name, args_inner, partition_by_inner, order_by_raw, window_frame_inner, ctx_inner)) # scalar functions @@ -536,8 +536,8 @@ def chr(arg: Expr) -> Expr: def coalesce(*args: Expr) -> Expr: """Returns the value of the first expr in ``args`` which is not NULL.""" - args = [arg.expr for arg in args] - return Expr(f.coalesce(*args)) + args_inner = [arg.expr for arg in args] + return Expr(f.coalesce(*args_inner)) def cos(arg: Expr) -> Expr: @@ -745,8 +745,10 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: false otherwise. """ if flags is not None: - flags = flags.expr - return Expr(f.regexp_like(string.expr, regex.expr, flags)) + flags_inner = flags.expr + else: + flags_inner = None + return Expr(f.regexp_like(string.expr, regex.expr, flags_inner)) def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: @@ -756,8 +758,10 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: corresponding index in ``regex`` to string in ``string``. """ if flags is not None: - flags = flags.expr - return Expr(f.regexp_match(string.expr, regex.expr, flags)) + flags_inner = flags.expr + else: + flags_inner = None + return Expr(f.regexp_match(string.expr, regex.expr, flags_inner)) def regexp_replace( @@ -772,8 +776,10 @@ def regexp_replace( """ if flags is not None: - flags = flags.expr - return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) + flags_inner = flags.expr + else: + flags_inner = None + return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags_inner)) def repeat(string: Expr, n: Expr) -> Expr: @@ -932,8 +938,8 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: if formatters is None: return f.to_timestamp(arg.expr) - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp(arg.expr, *formatters)) + formatters_inner = [f.expr for f in formatters] + return Expr(f.to_timestamp(arg.expr, *formatters_inner)) def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: @@ -941,7 +947,9 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - return Expr(f.to_timestamp_millis(arg.expr, *formatters)) + + formatters_inner = [f.expr for f in formatters] + return Expr(f.to_timestamp_millis(arg.expr, *formatters_inner)) def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: @@ -949,15 +957,8 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - return Expr(f.to_timestamp_micros(arg.expr, *formatters)) - - -def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. - - See :py:func:`to_timestamp` for a description on how to use formatters. - """ - return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) + formatters_inner = [f.expr for f in formatters] + return Expr(f.to_timestamp_micros(arg.expr, *formatters_inner)) def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: @@ -965,7 +966,8 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) + formatters_inner = [f.expr for f in formatters] + return Expr(f.to_timestamp_seconds(arg.expr, *formatters_inner)) def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr: @@ -1052,8 +1054,8 @@ def upper(arg: Expr) -> Expr: def make_array(*args: Expr) -> Expr: """Returns an array using the specified input expressions.""" - args = [arg.expr for arg in args] - return Expr(f.make_array(args)) + args_inner = [arg.expr for arg in args] + return Expr(f.make_array(args_inner)) def make_list(*args: Expr) -> Expr: @@ -1077,15 +1079,15 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr: return Expr(f.range(start.expr, stop.expr, step.expr)) -def uuid(arg: Expr) -> Expr: +def uuid() -> Expr: """Returns uuid v4 as a string value.""" - return Expr(f.uuid(arg.expr)) + return Expr(f.uuid()) def struct(*args: Expr) -> Expr: """Returns a struct with the given arguments.""" - args = [arg.expr for arg in args] - return Expr(f.struct(*args)) + args_inner = [arg.expr for arg in args] + return Expr(f.struct(*args_inner)) def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: @@ -1096,8 +1098,8 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: ] # flatten - name_pairs = [x.expr for xs in name_pair_exprs for x in xs] - return Expr(f.named_struct(*name_pairs)) + name_pairs_inner = [x.expr for xs in name_pair_exprs for x in xs] + return Expr(f.named_struct(*name_pairs_inner)) def from_unixtime(arg: Expr) -> Expr: @@ -1151,8 +1153,8 @@ def list_push_back(array: Expr, element: Expr) -> Expr: def array_concat(*args: Expr) -> Expr: """Concatenates the input arrays.""" - args = [arg.expr for arg in args] - return Expr(f.array_concat(args)) + args_inner = [arg.expr for arg in args] + return Expr(f.array_concat(args_inner)) def array_cat(*args: Expr) -> Expr: @@ -1496,8 +1498,10 @@ def array_slice( ) -> Expr: """Returns a slice of the array.""" if stride is not None: - stride = stride.expr - return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride)) + stride_inner = stride.expr + else: + stride_inner = None + return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride_inner)) def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) -> Expr: @@ -1707,7 +1711,7 @@ def array_agg( filter: If provided, only compute against rows for which the filter is True order_by: Order the resultant array values """ - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None filter_raw = filter.expr if filter is not None else None return Expr( @@ -2205,7 +2209,7 @@ def first_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None filter_raw = filter.expr if filter is not None else None return Expr( @@ -2237,7 +2241,7 @@ def last_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None filter_raw = filter.expr if filter is not None else None return Expr( @@ -2271,7 +2275,7 @@ def nth_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None filter_raw = filter.expr if filter is not None else None return Expr( @@ -2412,7 +2416,7 @@ def lead( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.lead( @@ -2464,7 +2468,7 @@ def lag( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.lag( @@ -2503,7 +2507,7 @@ def row_number( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.row_number( @@ -2544,7 +2548,7 @@ def rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.rank( @@ -2580,7 +2584,7 @@ def dense_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.dense_rank( @@ -2617,7 +2621,7 @@ def percent_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.percent_rank( @@ -2654,7 +2658,7 @@ def cume_dist( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.cume_dist( @@ -2695,7 +2699,7 @@ def ntile( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None return Expr( f.ntile( @@ -2727,7 +2731,7 @@ def string_agg( filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate """ - order_by_raw = sort_list_to_raw_sort_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None filter_raw = filter.expr if filter is not None else None return Expr( diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index b274539fc..d97ae735b 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -37,12 +37,12 @@ def is_correct_input(self, input_item: Any, table_name: str, **kwargs): def build_table( self, - input_file: str, + input_item: str, table_name: str, **kwargs, ) -> SqlTable: """Create a table from the input source.""" - _, extension = os.path.splitext(input_file) + _, extension = os.path.splitext(input_item) format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] @@ -50,7 +50,7 @@ def build_table( import pyarrow.parquet as pq # Read the Parquet metadata - metadata = pq.read_metadata(input_file) + metadata = pq.read_metadata(input_item) num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable for col in metadata.schema: @@ -69,7 +69,7 @@ def build_table( # to get that information. However, this should only be occurring # at table creation time and therefore shouldn't # slow down query performance. - with open(input_file, "r") as file: + with open(input_item, "r") as file: reader = csv.reader(file) header_row = next(reader) print(header_row) @@ -84,6 +84,6 @@ def build_table( ) # Input could possibly be multiple files. Create a list if so - input_files = glob.glob(input_file) + input_files = glob.glob(input_item) return SqlTable(table_name, columns, num_rows, input_files) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index d9d994b22..7e4dd68f1 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -84,7 +84,7 @@ class ScalarUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[..., _R], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: _R, @@ -157,7 +157,7 @@ def state(self) -> List[pyarrow.Scalar]: pass @abstractmethod - def update(self, *values: pyarrow.Array) -> None: + def update(self, values: pyarrow.Array) -> None: """Evaluate an array of values and update state.""" pass @@ -181,7 +181,7 @@ class AggregateUDF: def __init__( self, - name: Optional[str], + name: str, accumulator: Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, @@ -270,12 +270,13 @@ def sum_bias_10() -> Summarize: """ # noqa W505 if not callable(accum): raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): + if not isinstance(accum.__call__(), Accumulator): # type: ignore raise TypeError( "Accumulator must implement the abstract base class Accumulator" ) if name is None: - name = accum.__call__().__class__.__qualname__.lower() + name = accum.__call__().__class__.__qualname__.lower() # type: ignore + assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] return AggregateUDF( @@ -461,7 +462,7 @@ class WindowUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, @@ -532,12 +533,13 @@ def bias_10() -> BiasedNumbers: """ # noqa W505 if not callable(func): raise TypeError("`func` must be callable.") - if not isinstance(func.__call__(), WindowEvaluator): + if not isinstance(func.__call__(), WindowEvaluator): # type: ignore raise TypeError( "`func` must implement the abstract base class WindowEvaluator" ) if name is None: - name = func.__call__().__class__.__qualname__.lower() + name = func.__call__().__class__.__qualname__.lower() # type: ignore + assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] return WindowUDF( From a933fdbd742d378ea6ced69225095f7017249204 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 12 Jan 2025 15:13:43 +0800 Subject: [PATCH 2/9] update license --- python/datafusion/_internal/__init__.pyi | 65 ++++++++++++------- python/datafusion/_internal/common.pyi | 43 ++++++++---- python/datafusion/_internal/expr/__init__.pyi | 19 +++++- .../datafusion/_internal/expr/aggregate.pyi | 17 +++++ .../_internal/expr/aggregate_expr.pyi | 17 +++++ python/datafusion/_internal/expr/alias.pyi | 17 +++++ python/datafusion/_internal/expr/analyze.pyi | 19 +++++- python/datafusion/_internal/expr/base.pyi | 29 +++++++-- python/datafusion/_internal/expr/between.pyi | 17 +++++ .../datafusion/_internal/expr/binary_expr.pyi | 18 ++++- .../datafusion/_internal/expr/bool_expr.pyi | 17 +++++ python/datafusion/_internal/expr/case.pyi | 21 +++++- python/datafusion/_internal/expr/cast.pyi | 17 +++++ python/datafusion/_internal/expr/column.pyi | 17 +++++ .../_internal/expr/conditional_expr.pyi | 18 ++++- .../_internal/expr/create_memory_table.pyi | 19 +++++- .../datafusion/_internal/expr/create_view.pyi | 18 ++++- python/datafusion/_internal/expr/distinct.pyi | 18 ++++- .../datafusion/_internal/expr/drop_table.pyi | 17 +++++ .../_internal/expr/empty_relation.pyi | 18 ++++- python/datafusion/_internal/expr/exists.pyi | 19 +++++- python/datafusion/_internal/expr/explain.pyi | 17 +++++ .../datafusion/_internal/expr/extension.pyi | 19 +++++- python/datafusion/_internal/expr/filter.pyi | 19 +++++- .../_internal/expr/grouping_set.pyi | 19 +++++- python/datafusion/_internal/expr/in_list.pyi | 19 +++++- .../datafusion/_internal/expr/in_subquery.pyi | 18 ++++- python/datafusion/_internal/expr/join.pyi | 19 +++++- python/datafusion/_internal/expr/like.pyi | 17 +++++ python/datafusion/_internal/expr/limit.pyi | 17 +++++ python/datafusion/_internal/expr/literal.pyi | 21 +++++- .../datafusion/_internal/expr/placeholder.pyi | 19 +++++- .../datafusion/_internal/expr/projection.pyi | 18 ++++- .../datafusion/_internal/expr/repartition.pyi | 19 +++++- .../_internal/expr/scalar_subquery.pyi | 19 +++++- .../_internal/expr/scalar_variable.pyi | 17 +++++ python/datafusion/_internal/expr/sort.pyi | 19 +++++- .../datafusion/_internal/expr/sort_expr.pyi | 17 +++++ python/datafusion/_internal/expr/subquery.pyi | 21 +++++- .../_internal/expr/subquery_alias.pyi | 19 +++++- .../datafusion/_internal/expr/table_scan.pyi | 17 +++++ python/datafusion/_internal/expr/union.pyi | 19 +++++- python/datafusion/_internal/expr/unnest.pyi | 19 +++++- .../datafusion/_internal/expr/unnest_expr.pyi | 19 +++++- python/datafusion/_internal/expr/window.pyi | 19 +++++- .../_internal/functions/__init__.pyi | 17 +++++ .../_internal/object_store/__init__.pyi | 21 +++++- .../_internal/substrait/__init__.pyi | 21 +++++- 48 files changed, 887 insertions(+), 78 deletions(-) diff --git a/python/datafusion/_internal/__init__.pyi b/python/datafusion/_internal/__init__.pyi index 2a47b0d23..e2dd7af98 100644 --- a/python/datafusion/_internal/__init__.pyi +++ b/python/datafusion/_internal/__init__.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Set, Tuple import pyarrow as pa @@ -92,7 +109,7 @@ class SessionContext: def enable_url_table(self) -> SessionContext: ... def register_object_store(self, schema: str, storage: Any, host: Optional[str] = None): ... - + def register_listing_table(self, name: str, path: str, table_partition_cols: List[Tuple[str, str]] = [], file_extension: str = ".parquet", schema: Optional[pa.Schema] = None, file_sort_order: Optional[List[List[SortExpr]]] = None): ... def sql(self, query: str) -> DataFrame: ... @@ -122,9 +139,9 @@ class SessionContext: def register_record_batches(self, name: str, partitions: List[List[pa.RecordBatch]]): ... def register_parquet( - self, - name: str, - path: str, + self, + name: str, + path: str, table_partition_cols: List[Tuple[str, str]] = [], parquet_pruning: bool = True, file_extension: str = ".parquet", @@ -134,7 +151,7 @@ class SessionContext: **kwargs ): ... - + def register_csv( self, name: str, @@ -198,7 +215,7 @@ class SessionContext: def session_id(self) -> str: ... def read_json( - self, + self, path: str | List[str], schema: Optional[pa.Schema] = None, schema_infer_max_records: int = 1000, @@ -210,7 +227,7 @@ class SessionContext: ... def read_csv( - self, + self, path: str | List[str], schema: Optional[pa.Schema] = None, head_header: bool = True, @@ -224,7 +241,7 @@ class SessionContext: ... def read_parquet( - self, + self, path: str | List[str], table_partition_cols: List[Tuple[str, str]] = [], parquet_pruning: bool = True, @@ -245,12 +262,12 @@ class SessionContext: **kwargs ): ... - + def read_table(self, table: Table) -> DataFrame: ... def execute(self, plan: ExecutionPlan, part: int) -> RecordBatchStream: ... - + class DataFrame: def __getitem__(self, key: str | List[str] | Tuple[str, ...]) -> DataFrame: ... @@ -306,7 +323,7 @@ class DataFrame: def repartition(self, num: int) -> DataFrame: ... def repartition_by_hash(self, *args: Expr, num: int) -> DataFrame: ... - + def union(self, py_df: DataFrame, distinct: bool = False) -> DataFrame: ... def union_distinct(self, py_df: DataFrame) -> DataFrame: ... @@ -355,7 +372,7 @@ class ScalarUDF: return_type: pa.DataType, volatility: str ) -> None: ... - + def __call__(self, *args: Expr) -> Expr: ... class AggregateUDF: @@ -368,7 +385,7 @@ class AggregateUDF: state_type: List[pa.DataType], volatility: str, ) -> None: ... - + def __call__(self, *args: Expr) -> Expr: ... class WindowUDF: @@ -380,7 +397,7 @@ class WindowUDF: return_type: pa.DataType, volatility: str, ) -> None: ... - + def __call__(self, *args: Expr) -> Expr: ... class Config: @@ -394,18 +411,18 @@ class Config: def set(self, key: str, value: object): ... - + def get_all(self) -> Dict[str, Optional[str]]: ... - - + + class LogicalPlan: def to_variant(self) -> Any: ... - + def inputs(self) -> List[LogicalPlan]: ... - + def display(self) -> str: ... @@ -414,10 +431,10 @@ class LogicalPlan: def display_indent_schema(self) -> str: ... - + def display_graphviz(self) -> str: ... - + def to_proto(self) -> bytes: ... @@ -431,7 +448,7 @@ class ExecutionPlan: def display(self) -> str: ... - + def display_indent(self) -> str: ... @@ -441,7 +458,7 @@ class ExecutionPlan: @staticmethod def from_proto(ctx: SessionContext, proto_msg: bytes) -> ExecutionPlan: ... - + @property def partition_count(self) -> int: ... @@ -459,7 +476,7 @@ class RecordBatchStream: async def __anext__(self) -> RecordBatch: ... - + def __iter__(self) -> RecordBatch: ... diff --git a/python/datafusion/_internal/common.pyi b/python/datafusion/_internal/common.pyi index 902d6753b..c0835cfc7 100644 --- a/python/datafusion/_internal/common.pyi +++ b/python/datafusion/_internal/common.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import enum from typing import List, Optional, Tuple @@ -5,7 +22,7 @@ class DFSchema: @staticmethod def empty() -> DFSchema: ... - + def field_names(self) -> List[str]: ... @@ -111,13 +128,13 @@ class DataTypeMap: @property def python_type(self) -> PythonType: ... - + @python_type.setter def python_type(self, python_type: PythonType): ... @property def sql_type(self) -> SqlType: ... - + @sql_type.setter def sql_type(self, sql_type: SqlType): ... @@ -139,28 +156,28 @@ class SqlSchema: @property def name(self) -> str: ... - + @name.setter def name(self, name: str): ... @property def tables(self) -> List[SqlTable]: ... - + @tables.setter def tables(self, tables: List[SqlTable]): ... @property def views(self) -> List[SqlView]: ... - + @views.setter def views(self, views: List[SqlView]): ... @property def functions(self) -> List[SqlFunction]: ... - + @functions.setter def functions(self, functions: List[SqlFunction]): ... - + class SqlTable: def __init__( @@ -173,13 +190,13 @@ class SqlTable: @property def name(self) -> str: ... - + @name.setter def name(self, name: str): ... @property def columns(self) -> List[Tuple[str, DataTypeMap]]: ... - + @columns.setter def columns(self, columns: List[Tuple[str, DataTypeMap]]): ... @@ -224,13 +241,13 @@ class SqlView: @property def name(self) -> str: ... - + @name.setter def name(self, name: str): ... @property def definition(self) -> str: ... - + @definition.setter def definition(self, definition: str): ... @@ -242,4 +259,4 @@ class SqlStatistics: class SqlFunction: - ... \ No newline at end of file + ... diff --git a/python/datafusion/_internal/expr/__init__.pyi b/python/datafusion/_internal/expr/__init__.pyi index 72a9297eb..2fdc741e0 100644 --- a/python/datafusion/_internal/expr/__init__.pyi +++ b/python/datafusion/_internal/expr/__init__.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr as Expr, ExprFuncBuilder as ExprFuncBuilder from .column import Column as Column from .literal import Literal as Literal @@ -40,4 +57,4 @@ from .sort_expr import SortExpr as SortExpr from .subquery_alias import SubqueryAlias as SubqueryAlias from .drop_table import DropTable as DropTable from .repartition import Partitioning as Partitioning, Repartition as Repartition -from .window import WindowExpr as WindowExpr, WindowFrame as WindowFrame, WindowFrameBound as WindowFrameBound \ No newline at end of file +from .window import WindowExpr as WindowExpr, WindowFrame as WindowFrame, WindowFrameBound as WindowFrameBound diff --git a/python/datafusion/_internal/expr/aggregate.pyi b/python/datafusion/_internal/expr/aggregate.pyi index faec01280..a1bc6d989 100644 --- a/python/datafusion/_internal/expr/aggregate.pyi +++ b/python/datafusion/_internal/expr/aggregate.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema diff --git a/python/datafusion/_internal/expr/aggregate_expr.pyi b/python/datafusion/_internal/expr/aggregate_expr.pyi index e543dcae5..12207bcc8 100644 --- a/python/datafusion/_internal/expr/aggregate_expr.pyi +++ b/python/datafusion/_internal/expr/aggregate_expr.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .base import Expr diff --git a/python/datafusion/_internal/expr/alias.pyi b/python/datafusion/_internal/expr/alias.pyi index 6106b6067..02acd669c 100644 --- a/python/datafusion/_internal/expr/alias.pyi +++ b/python/datafusion/_internal/expr/alias.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr class Alias: diff --git a/python/datafusion/_internal/expr/analyze.pyi b/python/datafusion/_internal/expr/analyze.pyi index f253b042a..203afaab4 100644 --- a/python/datafusion/_internal/expr/analyze.pyi +++ b/python/datafusion/_internal/expr/analyze.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema @@ -8,4 +25,4 @@ class Analyze: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... \ No newline at end of file + def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/base.pyi b/python/datafusion/_internal/expr/base.pyi index 845cd1c8b..f368ec74c 100644 --- a/python/datafusion/_internal/expr/base.pyi +++ b/python/datafusion/_internal/expr/base.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Any, List, Optional import pyarrow as pa @@ -13,10 +30,10 @@ class Expr: def schema_name(self) -> str: ... - + def canonical_name(self) -> str: ... - + def variant_name(self) -> str: ... @@ -65,7 +82,7 @@ class Expr: def python_value(self) -> Any: ... def rex_call_operands(self) -> List[Expr]: ... - + def rex_call_operator(self) -> str: ... def column_name(self, plan: LogicalPlan) -> str: ... @@ -83,10 +100,10 @@ class Expr: def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... def over( - self, - partition_by: Optional[List[Expr]] = None, + self, + partition_by: Optional[List[Expr]] = None, window_frame: Optional[WindowFrame] = None, - order_by: Optional[List[SortExpr]] = None, + order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[NullTreatment] = None) -> Expr: ... diff --git a/python/datafusion/_internal/expr/between.pyi b/python/datafusion/_internal/expr/between.pyi index 265010ad5..2c79e698d 100644 --- a/python/datafusion/_internal/expr/between.pyi +++ b/python/datafusion/_internal/expr/between.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr diff --git a/python/datafusion/_internal/expr/binary_expr.pyi b/python/datafusion/_internal/expr/binary_expr.pyi index f179ce3ec..bf0149b82 100644 --- a/python/datafusion/_internal/expr/binary_expr.pyi +++ b/python/datafusion/_internal/expr/binary_expr.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr @@ -7,4 +24,3 @@ class BinaryExpr: def right(self) -> Expr: ... def on(self) -> str: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/bool_expr.pyi b/python/datafusion/_internal/expr/bool_expr.pyi index b5e35b2e2..8eebf534d 100644 --- a/python/datafusion/_internal/expr/bool_expr.pyi +++ b/python/datafusion/_internal/expr/bool_expr.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr diff --git a/python/datafusion/_internal/expr/case.pyi b/python/datafusion/_internal/expr/case.pyi index f1490ce51..bf5ac1ccd 100644 --- a/python/datafusion/_internal/expr/case.pyi +++ b/python/datafusion/_internal/expr/case.pyi @@ -1,10 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional, Tuple from .base import Expr class Case: def expr(self) -> Optional[Expr]: ... - + def when_then_expr(self) -> List[Tuple[Expr, Expr]]: ... - def else_expr(self) -> Optional[Expr]: ... \ No newline at end of file + def else_expr(self) -> Optional[Expr]: ... diff --git a/python/datafusion/_internal/expr/cast.pyi b/python/datafusion/_internal/expr/cast.pyi index bf18e64f1..75e372ad0 100644 --- a/python/datafusion/_internal/expr/cast.pyi +++ b/python/datafusion/_internal/expr/cast.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr from ..common import DataType diff --git a/python/datafusion/_internal/expr/column.pyi b/python/datafusion/_internal/expr/column.pyi index 7e5ff7876..57be204b6 100644 --- a/python/datafusion/_internal/expr/column.pyi +++ b/python/datafusion/_internal/expr/column.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Optional class Column: diff --git a/python/datafusion/_internal/expr/conditional_expr.pyi b/python/datafusion/_internal/expr/conditional_expr.pyi index 01639fa57..a55ae352d 100644 --- a/python/datafusion/_internal/expr/conditional_expr.pyi +++ b/python/datafusion/_internal/expr/conditional_expr.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr @@ -7,4 +24,3 @@ class CaseBuilder: def otherwise(self, else_expr: Expr) -> Expr: ... def end(self) -> Expr: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/create_memory_table.pyi b/python/datafusion/_internal/expr/create_memory_table.pyi index fadb1cc58..10cadfe88 100644 --- a/python/datafusion/_internal/expr/create_memory_table.pyi +++ b/python/datafusion/_internal/expr/create_memory_table.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan @@ -7,4 +24,4 @@ class CreateMemoryTable: def input(self) -> List[LogicalPlan]: ... def if_not_exists(self) -> bool: ... def or_replace(self) -> bool: ... - def __name__(self) -> str: ... \ No newline at end of file + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/create_view.pyi b/python/datafusion/_internal/expr/create_view.pyi index 31ab77a81..deec726a5 100644 --- a/python/datafusion/_internal/expr/create_view.pyi +++ b/python/datafusion/_internal/expr/create_view.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional from .. import LogicalPlan @@ -8,4 +25,3 @@ class CreateView: def or_replace(self) -> bool: ... def definition(self) -> Optional[str]: ... def __name__(self) -> str: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/distinct.pyi b/python/datafusion/_internal/expr/distinct.pyi index 9a234a3bf..747be4e2a 100644 --- a/python/datafusion/_internal/expr/distinct.pyi +++ b/python/datafusion/_internal/expr/distinct.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan @@ -6,4 +23,3 @@ class Distinct: def input(self) -> List[LogicalPlan]: ... def __name__(self) -> str: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/drop_table.pyi b/python/datafusion/_internal/expr/drop_table.pyi index 8fe647b4e..94b084ab0 100644 --- a/python/datafusion/_internal/expr/drop_table.pyi +++ b/python/datafusion/_internal/expr/drop_table.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan diff --git a/python/datafusion/_internal/expr/empty_relation.pyi b/python/datafusion/_internal/expr/empty_relation.pyi index 048e5a23e..2b3902ad8 100644 --- a/python/datafusion/_internal/expr/empty_relation.pyi +++ b/python/datafusion/_internal/expr/empty_relation.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from ..common import DFSchema @@ -7,4 +24,3 @@ class EmptyRelation: def schema(self) -> DFSchema: ... def __name__(self) -> str: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/exists.pyi b/python/datafusion/_internal/expr/exists.pyi index 0980b70a9..75e8f3432 100644 --- a/python/datafusion/_internal/expr/exists.pyi +++ b/python/datafusion/_internal/expr/exists.pyi @@ -1,7 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .subquery import Subquery class Exists: def subquery(self) -> Subquery: ... - def negated(self) -> bool: ... \ No newline at end of file + def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/explain.pyi b/python/datafusion/_internal/expr/explain.pyi index 6feae6401..98ac70fcf 100644 --- a/python/datafusion/_internal/expr/explain.pyi +++ b/python/datafusion/_internal/expr/explain.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema diff --git a/python/datafusion/_internal/expr/extension.pyi b/python/datafusion/_internal/expr/extension.pyi index 3da0c6d1e..552d9ed1f 100644 --- a/python/datafusion/_internal/expr/extension.pyi +++ b/python/datafusion/_internal/expr/extension.pyi @@ -1,2 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + class Extension: - def name(self) -> str: ... \ No newline at end of file + def name(self) -> str: ... diff --git a/python/datafusion/_internal/expr/filter.pyi b/python/datafusion/_internal/expr/filter.pyi index a5ba1332b..fc7b148a6 100644 --- a/python/datafusion/_internal/expr/filter.pyi +++ b/python/datafusion/_internal/expr/filter.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .base import Expr from .. import LogicalPlan @@ -9,4 +26,4 @@ class Filter: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... \ No newline at end of file + def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/grouping_set.pyi b/python/datafusion/_internal/expr/grouping_set.pyi index 061c34866..4c2632e1c 100644 --- a/python/datafusion/_internal/expr/grouping_set.pyi +++ b/python/datafusion/_internal/expr/grouping_set.pyi @@ -1,2 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + class GroupingSet: - ... \ No newline at end of file + ... diff --git a/python/datafusion/_internal/expr/in_list.pyi b/python/datafusion/_internal/expr/in_list.pyi index 98c8c362c..0469880db 100644 --- a/python/datafusion/_internal/expr/in_list.pyi +++ b/python/datafusion/_internal/expr/in_list.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .base import Expr @@ -6,4 +23,4 @@ class InList: def list(self) -> List[Expr]: ... - def negated(self) -> bool: ... \ No newline at end of file + def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/in_subquery.pyi b/python/datafusion/_internal/expr/in_subquery.pyi index dda995df2..400bad460 100644 --- a/python/datafusion/_internal/expr/in_subquery.pyi +++ b/python/datafusion/_internal/expr/in_subquery.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr from .subquery import Subquery @@ -8,4 +25,3 @@ class InSubquery: def subquery(self) -> Subquery: ... def negated(self) -> bool: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/join.pyi b/python/datafusion/_internal/expr/join.pyi index 8819dd27b..f0c4692f8 100644 --- a/python/datafusion/_internal/expr/join.pyi +++ b/python/datafusion/_internal/expr/join.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional, Tuple from .. import LogicalPlan from .base import Expr @@ -13,7 +30,7 @@ class JoinConstraint: class Join: def left(self) -> LogicalPlan: ... - + def right(self) -> LogicalPlan: ... def on(self) -> List[Tuple[Expr, Expr]]: ... diff --git a/python/datafusion/_internal/expr/like.pyi b/python/datafusion/_internal/expr/like.pyi index 6daf940d5..78f955551 100644 --- a/python/datafusion/_internal/expr/like.pyi +++ b/python/datafusion/_internal/expr/like.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Optional from .base import Expr diff --git a/python/datafusion/_internal/expr/limit.pyi b/python/datafusion/_internal/expr/limit.pyi index bb3c5d691..d8e111398 100644 --- a/python/datafusion/_internal/expr/limit.pyi +++ b/python/datafusion/_internal/expr/limit.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema diff --git a/python/datafusion/_internal/expr/literal.pyi b/python/datafusion/_internal/expr/literal.pyi index 2600e2f5b..ac767277b 100644 --- a/python/datafusion/_internal/expr/literal.pyi +++ b/python/datafusion/_internal/expr/literal.pyi @@ -1,9 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Optional, Tuple class Literal: def data_type(self) -> str: ... - + def value_f32(self) -> Optional[float]: ... def value_f64(self) -> Optional[float]: ... @@ -19,7 +36,7 @@ class Literal: def value_i64(self) -> Optional[int]: ... def value_u8(self) -> Optional[int]: ... - + def value_u16(self) -> Optional[int]: ... def value_u32(self) -> Optional[int]: ... diff --git a/python/datafusion/_internal/expr/placeholder.pyi b/python/datafusion/_internal/expr/placeholder.pyi index 811c9e2fb..e62fa8128 100644 --- a/python/datafusion/_internal/expr/placeholder.pyi +++ b/python/datafusion/_internal/expr/placeholder.pyi @@ -1,6 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from ..common import DataType class Placeholder: def id(self) -> str: ... - def data_type(self) -> DataType: ... \ No newline at end of file + def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/projection.pyi b/python/datafusion/_internal/expr/projection.pyi index 42d4621e3..c71252d48 100644 --- a/python/datafusion/_internal/expr/projection.pyi +++ b/python/datafusion/_internal/expr/projection.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .base import Expr from .. import LogicalPlan @@ -12,4 +29,3 @@ class Projection: def schema(self) -> DFSchema: ... def __name__(self) -> str: ... - \ No newline at end of file diff --git a/python/datafusion/_internal/expr/repartition.pyi b/python/datafusion/_internal/expr/repartition.pyi index 00b6f4033..5cbcc9f18 100644 --- a/python/datafusion/_internal/expr/repartition.pyi +++ b/python/datafusion/_internal/expr/repartition.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from .base import Expr @@ -11,4 +28,4 @@ class Repartition: def __name__(self) -> str: ... class Partitioning: - ... \ No newline at end of file + ... diff --git a/python/datafusion/_internal/expr/scalar_subquery.pyi b/python/datafusion/_internal/expr/scalar_subquery.pyi index 3146859b0..11ff0949f 100644 --- a/python/datafusion/_internal/expr/scalar_subquery.pyi +++ b/python/datafusion/_internal/expr/scalar_subquery.pyi @@ -1,5 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .subquery import Subquery class ScalarSubquery: - def subquery(self) -> Subquery: ... \ No newline at end of file + def subquery(self) -> Subquery: ... diff --git a/python/datafusion/_internal/expr/scalar_variable.pyi b/python/datafusion/_internal/expr/scalar_variable.pyi index ae0022953..490d31174 100644 --- a/python/datafusion/_internal/expr/scalar_variable.pyi +++ b/python/datafusion/_internal/expr/scalar_variable.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from ..common import DataType diff --git a/python/datafusion/_internal/expr/sort.pyi b/python/datafusion/_internal/expr/sort.pyi index 12f899c86..437ffc2d0 100644 --- a/python/datafusion/_internal/expr/sort.pyi +++ b/python/datafusion/_internal/expr/sort.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional from .. import LogicalPlan @@ -12,4 +29,4 @@ class Sort: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... \ No newline at end of file + def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/sort_expr.pyi b/python/datafusion/_internal/expr/sort_expr.pyi index c8d93bc4f..af5720aec 100644 --- a/python/datafusion/_internal/expr/sort_expr.pyi +++ b/python/datafusion/_internal/expr/sort_expr.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr diff --git a/python/datafusion/_internal/expr/subquery.pyi b/python/datafusion/_internal/expr/subquery.pyi index 74768a2c1..07847e92e 100644 --- a/python/datafusion/_internal/expr/subquery.pyi +++ b/python/datafusion/_internal/expr/subquery.pyi @@ -1,9 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan class Subquery: def input(self) -> List[LogicalPlan]: ... - + def __name__(self) -> str: - ... \ No newline at end of file + ... diff --git a/python/datafusion/_internal/expr/subquery_alias.pyi b/python/datafusion/_internal/expr/subquery_alias.pyi index 164c05aab..7de080931 100644 --- a/python/datafusion/_internal/expr/subquery_alias.pyi +++ b/python/datafusion/_internal/expr/subquery_alias.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema @@ -10,4 +27,4 @@ class SubqueryAlias: def alias(self) -> str: ... - def __name__(self) -> str: ... \ No newline at end of file + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/table_scan.pyi b/python/datafusion/_internal/expr/table_scan.pyi index 36d80dc1b..5f28aefe4 100644 --- a/python/datafusion/_internal/expr/table_scan.pyi +++ b/python/datafusion/_internal/expr/table_scan.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional, Tuple from ..common import DFSchema from .base import Expr diff --git a/python/datafusion/_internal/expr/union.pyi b/python/datafusion/_internal/expr/union.pyi index 051f90edd..52e67fee3 100644 --- a/python/datafusion/_internal/expr/union.pyi +++ b/python/datafusion/_internal/expr/union.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema @@ -8,4 +25,4 @@ class Union: def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... \ No newline at end of file + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest.pyi b/python/datafusion/_internal/expr/unnest.pyi index 13252e213..b2ff582c2 100644 --- a/python/datafusion/_internal/expr/unnest.pyi +++ b/python/datafusion/_internal/expr/unnest.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from .. import LogicalPlan from ..common import DFSchema @@ -8,4 +25,4 @@ class Unnest: def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... \ No newline at end of file + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest_expr.pyi b/python/datafusion/_internal/expr/unnest_expr.pyi index 50e08808b..7052f9ae7 100644 --- a/python/datafusion/_internal/expr/unnest_expr.pyi +++ b/python/datafusion/_internal/expr/unnest_expr.pyi @@ -1,7 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .base import Expr class UnnestExpr: def expr(self) -> Expr: ... - def __name__(self) -> str: ... \ No newline at end of file + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/window.pyi b/python/datafusion/_internal/expr/window.pyi index 03cac176c..e839c87f6 100644 --- a/python/datafusion/_internal/expr/window.pyi +++ b/python/datafusion/_internal/expr/window.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Any, List, Optional from ..common import DFSchema from .base import Expr @@ -27,7 +44,7 @@ class WindowFrame: start_bound: Optional[Any], end_bound: Optional[Any], ) -> None: ... - + def get_frame_units(self) -> str: ... def get_lower_bound(self) -> WindowFrameBound: ... diff --git a/python/datafusion/_internal/functions/__init__.pyi b/python/datafusion/_internal/functions/__init__.pyi index 97b523db1..a383ef2ed 100644 --- a/python/datafusion/_internal/functions/__init__.pyi +++ b/python/datafusion/_internal/functions/__init__.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import Any, List, Optional, Protocol from ..expr import Expr, SortExpr, CaseBuilder, WindowFrame from .. import SessionContext diff --git a/python/datafusion/_internal/object_store/__init__.pyi b/python/datafusion/_internal/object_store/__init__.pyi index d700b77dd..fa7cbc21d 100644 --- a/python/datafusion/_internal/object_store/__init__.pyi +++ b/python/datafusion/_internal/object_store/__init__.pyi @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List, Optional, Tuple @@ -25,7 +42,7 @@ class GoogleCloud: bucket_name: str, service_account_path: Optional[str] = None, ) -> None: ... - + class AmazonS3: def __init__( self, @@ -37,7 +54,7 @@ class AmazonS3: allow_http: bool = False, imdsv1_fallback: bool = False, ) -> None: ... - + class Http: def __init__(self, url: str) -> None: ... diff --git a/python/datafusion/_internal/substrait/__init__.pyi b/python/datafusion/_internal/substrait/__init__.pyi index 3c68732b2..37b0e7398 100644 --- a/python/datafusion/_internal/substrait/__init__.pyi +++ b/python/datafusion/_internal/substrait/__init__.pyi @@ -1,15 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from .. import SessionContext, LogicalPlan class Plan: def encode(self) -> bytes: ... - + class Serde: @staticmethod def serialize(sql: str, ctx: SessionContext, path: str): ... - + @staticmethod def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: ... From 9897b63ad2bc9eea1393ed36d215b78996ba6ee6 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 12 Jan 2025 15:16:25 +0800 Subject: [PATCH 3/9] format --- python/datafusion/_internal/expr/__init__.pyi | 1 - python/datafusion/_internal/functions/__init__.pyi | 1 - python/datafusion/functions.py | 1 - 3 files changed, 3 deletions(-) diff --git a/python/datafusion/_internal/expr/__init__.pyi b/python/datafusion/_internal/expr/__init__.pyi index 2fdc741e0..ef21c0ba6 100644 --- a/python/datafusion/_internal/expr/__init__.pyi +++ b/python/datafusion/_internal/expr/__init__.pyi @@ -17,7 +17,6 @@ from .base import Expr as Expr, ExprFuncBuilder as ExprFuncBuilder from .column import Column as Column -from .literal import Literal as Literal from .binary_expr import BinaryExpr as BinaryExpr from .literal import Literal as Literal from .aggregate_expr import AggregateFunction as AggregateFunction diff --git a/python/datafusion/_internal/functions/__init__.pyi b/python/datafusion/_internal/functions/__init__.pyi index a383ef2ed..3807d715d 100644 --- a/python/datafusion/_internal/functions/__init__.pyi +++ b/python/datafusion/_internal/functions/__init__.pyi @@ -18,7 +18,6 @@ from typing import Any, List, Optional, Protocol from ..expr import Expr, SortExpr, CaseBuilder, WindowFrame from .. import SessionContext -from ...common import NullTreatment def in_list(expr: Expr, value: List[Expr], negated: bool) -> Expr: ... diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 440bc1fc6..50d8c6a18 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -947,7 +947,6 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters_inner = [f.expr for f in formatters] return Expr(f.to_timestamp_millis(arg.expr, *formatters_inner)) From 768b6c074a624c0390a5db5d17450f51a3f678e1 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 12 Jan 2025 15:17:51 +0800 Subject: [PATCH 4/9] format --- python/datafusion/__init__.py | 2 +- python/datafusion/_internal/__init__.pyi | 344 ++++++------------ python/datafusion/_internal/common.pyi | 75 +--- python/datafusion/_internal/expr/__init__.pyi | 19 +- .../datafusion/_internal/expr/aggregate.pyi | 7 - .../_internal/expr/aggregate_expr.pyi | 3 - python/datafusion/_internal/expr/alias.pyi | 1 - python/datafusion/_internal/expr/analyze.pyi | 3 - python/datafusion/_internal/expr/base.pyi | 63 +--- python/datafusion/_internal/expr/between.pyi | 4 - .../datafusion/_internal/expr/binary_expr.pyi | 3 - .../datafusion/_internal/expr/bool_expr.pyi | 1 - python/datafusion/_internal/expr/case.pyi | 3 - python/datafusion/_internal/expr/cast.pyi | 8 +- python/datafusion/_internal/expr/column.pyi | 3 - .../_internal/expr/conditional_expr.pyi | 3 - .../_internal/expr/create_memory_table.pyi | 1 - .../datafusion/_internal/expr/create_view.pyi | 1 - python/datafusion/_internal/expr/distinct.pyi | 2 - .../datafusion/_internal/expr/drop_table.pyi | 4 - .../_internal/expr/empty_relation.pyi | 3 - python/datafusion/_internal/expr/exists.pyi | 2 - python/datafusion/_internal/expr/explain.pyi | 6 - python/datafusion/_internal/expr/filter.pyi | 3 - .../_internal/expr/grouping_set.pyi | 3 +- python/datafusion/_internal/expr/in_list.pyi | 2 - .../datafusion/_internal/expr/in_subquery.pyi | 3 - python/datafusion/_internal/expr/join.pyi | 14 +- python/datafusion/_internal/expr/like.pyi | 11 - python/datafusion/_internal/expr/limit.pyi | 2 - python/datafusion/_internal/expr/literal.pyi | 21 -- .../datafusion/_internal/expr/placeholder.pyi | 1 - .../datafusion/_internal/expr/projection.pyi | 4 - .../datafusion/_internal/expr/repartition.pyi | 4 +- .../_internal/expr/scalar_subquery.pyi | 1 - .../_internal/expr/scalar_variable.pyi | 8 +- python/datafusion/_internal/expr/sort.pyi | 4 - .../datafusion/_internal/expr/sort_expr.pyi | 4 - python/datafusion/_internal/expr/subquery.pyi | 7 +- .../_internal/expr/subquery_alias.pyi | 4 - .../datafusion/_internal/expr/table_scan.pyi | 7 - python/datafusion/_internal/expr/union.pyi | 3 - python/datafusion/_internal/expr/unnest.pyi | 3 - .../datafusion/_internal/expr/unnest_expr.pyi | 2 - python/datafusion/_internal/expr/window.pyi | 19 +- .../_internal/functions/__init__.pyi | 259 +++++-------- .../_internal/object_store/__init__.pyi | 10 +- .../_internal/substrait/__init__.pyi | 29 +- python/datafusion/context.py | 4 +- python/datafusion/dataframe.py | 10 +- python/datafusion/expr.py | 20 +- python/datafusion/functions.py | 71 +++- python/datafusion/udf.py | 8 +- 53 files changed, 328 insertions(+), 774 deletions(-) diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index c73d424dd..399ef07cf 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -24,7 +24,7 @@ try: import importlib.metadata as importlib_metadata except ImportError: - import importlib_metadata # type: ignore + import importlib_metadata # type: ignore from .context import ( SessionContext, diff --git a/python/datafusion/_internal/__init__.pyi b/python/datafusion/_internal/__init__.pyi index e2dd7af98..773f5c45e 100644 --- a/python/datafusion/_internal/__init__.pyi +++ b/python/datafusion/_internal/__init__.pyi @@ -27,117 +27,103 @@ from .expr import SortExpr, Expr class Catalog: def names(self) -> List[str]: ... - def database(self, name: str = "public") -> Database: ... - class Database: def names(self) -> Set[str]: ... - def table(self, name: str) -> Table: ... class Table: @property - def schema(self) -> pa.Schema: - ... - + def schema(self) -> pa.Schema: ... @property - def kind(self) -> str: - ... + def kind(self) -> str: ... class SessionConfig: def __init__(self, config_options: Optional[Dict[str, str]] = None) -> None: ... - - def with_create_default_catalog_and_schema(self, enabled: bool) -> SessionConfig: ... - - def with_default_catalog_and_schema(self, catalog: str, schema: str) -> SessionConfig: ... - + def with_create_default_catalog_and_schema( + self, enabled: bool + ) -> SessionConfig: ... + def with_default_catalog_and_schema( + self, catalog: str, schema: str + ) -> SessionConfig: ... def with_information_schema(self, enabled: bool) -> SessionConfig: ... - def with_batch_size(self, batch_size: int) -> SessionConfig: ... - def with_target_partitions(self, target_partitions: int) -> SessionConfig: ... - def with_repartition_aggregations(self, enabled: bool) -> SessionConfig: ... - def with_repartition_joins(self, enabled: bool) -> SessionConfig: ... - def with_repartition_windows(self, enabled: bool) -> SessionConfig: ... - def with_repartition_sorts(self, enabled: bool) -> SessionConfig: ... - def with_repartition_file_scans(self, enabled: bool) -> SessionConfig: ... - def with_repartition_file_min_size(self, size: int) -> SessionConfig: ... - def with_parquet_pruning(self, enabled: bool) -> SessionConfig: ... - def set(self, key: str, value: str) -> SessionConfig: ... - class RuntimeEnvBuilder: def __init__(self) -> None: ... - def with_disk_manager_disabled(self) -> RuntimeEnvBuilder: ... - def with_disk_manager_os(self) -> RuntimeEnvBuilder: ... - def with_disk_manager_specified(self, paths: List[str]) -> RuntimeEnvBuilder: ... - def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder: ... - def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder: ... - def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder: ... - def with_temp_file_path(self, path: str) -> RuntimeEnvBuilder: ... - class SQLOptions: def __init__(self) -> None: ... - def with_allow_ddl(self, allow: bool) -> SQLOptions: ... - def with_allow_dml(self, allow: bool) -> SQLOptions: ... - def with_allow_statements(self, allow: bool) -> SQLOptions: ... - class SessionContext: - def __init__(self, config: Optional[SessionConfig] = None, runtime: Optional[RuntimeEnvBuilder] = None) -> None: ... - + def __init__( + self, + config: Optional[SessionConfig] = None, + runtime: Optional[RuntimeEnvBuilder] = None, + ) -> None: ... def enable_url_table(self) -> SessionContext: ... - - def register_object_store(self, schema: str, storage: Any, host: Optional[str] = None): ... - - def register_listing_table(self, name: str, path: str, table_partition_cols: List[Tuple[str, str]] = [], file_extension: str = ".parquet", schema: Optional[pa.Schema] = None, file_sort_order: Optional[List[List[SortExpr]]] = None): ... - + def register_object_store( + self, schema: str, storage: Any, host: Optional[str] = None + ): ... + def register_listing_table( + self, + name: str, + path: str, + table_partition_cols: List[Tuple[str, str]] = [], + file_extension: str = ".parquet", + schema: Optional[pa.Schema] = None, + file_sort_order: Optional[List[List[SortExpr]]] = None, + ): ... def sql(self, query: str) -> DataFrame: ... - - def sql_with_options(self, query: str, options: Optional[SQLOptions] = None) -> DataFrame: ... - - def create_dataframe(self, partitions: List[List[pa.RecordBatch]], name: Optional[str] = None, schema: Optional[pa.Schema] = None) -> DataFrame: ... - + def sql_with_options( + self, query: str, options: Optional[SQLOptions] = None + ) -> DataFrame: ... + def create_dataframe( + self, + partitions: List[List[pa.RecordBatch]], + name: Optional[str] = None, + schema: Optional[pa.Schema] = None, + ) -> DataFrame: ... def create_dataframe_from_logical_plan(self, plan: LogicalPlan) -> DataFrame: ... - def from_pylist(self, data: list, name: Optional[str] = None) -> DataFrame: ... - def from_pydict(self, data: dict, name: Optional[str] = None) -> DataFrame: ... - - def from_arrow(self, data: ArrowStreamExportable | pa.RecordBatchReader, name: Optional[str] = None) -> DataFrame: ... - - def from_pandas(self, data: pd.DataFrame, name: Optional[str] = None) -> DataFrame: ... - - def from_polars(self, data: pl.DataFrame, name: Optional[str] = None) -> DataFrame: ... - + def from_arrow( + self, + data: ArrowStreamExportable | pa.RecordBatchReader, + name: Optional[str] = None, + ) -> DataFrame: ... + def from_pandas( + self, data: pd.DataFrame, name: Optional[str] = None + ) -> DataFrame: ... + def from_polars( + self, data: pl.DataFrame, name: Optional[str] = None + ) -> DataFrame: ... def register_table(self, data: str, table: Table): ... - def deregister_table(self, name: str): ... - def register_table_provider(self, name: str, provider: TableProviderExportable): ... - - def register_record_batches(self, name: str, partitions: List[List[pa.RecordBatch]]): ... - + def register_record_batches( + self, name: str, partitions: List[List[pa.RecordBatch]] + ): ... def register_parquet( self, name: str, @@ -148,10 +134,8 @@ class SessionContext: skip_metadata: bool = True, schema: Optional[pa.Schema] = None, file_sort_order: Optional[List[List[SortExpr]]] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def register_csv( self, name: str, @@ -162,10 +146,8 @@ class SessionContext: schema_infer_max_records: int = 1000, file_extension: str = ".csv", file_compression_type: Optional[str] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def register_json( self, name: str, @@ -175,10 +157,8 @@ class SessionContext: file_extension: str = ".json", table_partition_cols: List[Tuple[str, str]] = [], file_compression_type: Optional[str] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def register_avro( self, name: str, @@ -186,34 +166,17 @@ class SessionContext: schema: Optional[pa.Schema] = None, file_extension: str = ".avro", table_partition_cols: List[Tuple[str, str]] = [], - ): - ... - - def register_dataset( - self, - name: str, - dataset: Dataset - ): - ... - + ): ... + def register_dataset(self, name: str, dataset: Dataset): ... def register_udf(self, udf: ScalarUDF): ... - def register_udaf(self, udaf: AggregateUDF): ... - def register_udwf(self, udwf: WindowUDF): ... - def catalog(self, name: str = "datafusion") -> Catalog: ... - def tables(self) -> Set[str]: ... - def table(self, name: str) -> DataFrame: ... - def table_exist(self, name: str) -> bool: ... - def empty_table(self) -> DataFrame: ... - def session_id(self) -> str: ... - def read_json( self, path: str | List[str], @@ -222,10 +185,8 @@ class SessionContext: file_extension: str = ".json", table_partition_cols: List[Tuple[str, str]] = [], file_compression_type: Optional[str] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def read_csv( self, path: str | List[str], @@ -236,10 +197,8 @@ class SessionContext: file_extension: str = ".csv", table_partition_cols: List[Tuple[str, str]] = [], file_compression_type: Optional[str] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def read_parquet( self, path: str | List[str], @@ -249,120 +208,77 @@ class SessionContext: skip_metadata: bool = True, schema: Optional[pa.Schema] = None, file_sort_order: Optional[List[List[SortExpr]]] = None, - **kwargs - ): - ... - + **kwargs, + ): ... def read_avro( self, path: str, schema: Optional[pa.Schema] = None, table_partition_cols: List[Tuple[str, str]] = [], file_extension: str = ".avro", - **kwargs - ): - ... - + **kwargs, + ): ... def read_table(self, table: Table) -> DataFrame: ... - - def execute(self, plan: ExecutionPlan, part: int) -> RecordBatchStream: - ... - + def execute(self, plan: ExecutionPlan, part: int) -> RecordBatchStream: ... class DataFrame: def __getitem__(self, key: str | List[str] | Tuple[str, ...]) -> DataFrame: ... - def _repr_html_(self) -> str: ... - def describe(self) -> DataFrame: ... - def schema(self) -> pa.Schema: ... - def select_columns(self, *args: str) -> DataFrame: ... - def select(self, *args: Expr) -> DataFrame: ... - def drop(self, *args: str) -> DataFrame: ... - def filter(self, predicate: Expr) -> DataFrame: ... - def with_column(self, name: str, expr: Expr) -> DataFrame: ... - def with_columns(self, exprs: List[Expr]) -> DataFrame: ... - def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: ... - def aggregate(self, group_by: List[Expr], aggs: List[Expr]) -> DataFrame: ... - def sort(self, *exprs: SortExpr) -> DataFrame: ... - def limit(self, count: int, offset: int) -> DataFrame: ... - def collect(self) -> List[pa.RecordBatch]: ... - def cache(self) -> DataFrame: ... - def collect_partitioned(self) -> List[List[pa.RecordBatch]]: ... - def show(self, num: int = 20): ... - def distinct(self) -> DataFrame: ... - - def join(self, right: DataFrame, how: str, left_on: List[str], right_on: List[str]) -> DataFrame: ... - - def join_on(self, right: DataFrame, on_exprs: List[Expr], how: str) -> DataFrame: ... - + def join( + self, right: DataFrame, how: str, left_on: List[str], right_on: List[str] + ) -> DataFrame: ... + def join_on( + self, right: DataFrame, on_exprs: List[Expr], how: str + ) -> DataFrame: ... def explain(self, verbose: bool = False, analyze: bool = False): ... - def logical_plan(self) -> LogicalPlan: ... - def optimized_logical_plan(self) -> LogicalPlan: ... - def execution_plan(self) -> ExecutionPlan: ... - def repartition(self, num: int) -> DataFrame: ... - def repartition_by_hash(self, *args: Expr, num: int) -> DataFrame: ... - def union(self, py_df: DataFrame, distinct: bool = False) -> DataFrame: ... - def union_distinct(self, py_df: DataFrame) -> DataFrame: ... - def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame: ... - - def unnest_columns(self, columns: List[str], preserve_nulls: bool = True) -> DataFrame: ... - + def unnest_columns( + self, columns: List[str], preserve_nulls: bool = True + ) -> DataFrame: ... def intersect(self, py_df: DataFrame) -> DataFrame: ... - def except_all(self, py_df: DataFrame) -> DataFrame: ... - def write_csv(self, path: str, with_header: bool): ... - - def write_parquet(self, path: str, compression: str = "uncompressed", compression_level: Optional[int] = None): ... - + def write_parquet( + self, + path: str, + compression: str = "uncompressed", + compression_level: Optional[int] = None, + ): ... def write_json(self, path: str): ... - def to_arrow_table(self) -> pa.Table: ... - - def __arrow_c_stream__( - self, requested_schema: object | None = None - ) -> object: ... - + def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ... def execute_stream(self) -> RecordBatchStream: ... - def execute_stream_partitioned(self) -> List[RecordBatchStream]: ... - def to_pandas(self) -> pd.DataFrame: ... - def to_pylist(self) -> list: ... - def to_pydict(self) -> dict: ... - def to_polars(self) -> pl.DataFrame: ... - def count(self) -> int: ... - class ScalarUDF: def __init__( self, @@ -370,9 +286,8 @@ class ScalarUDF: func: Callable[..., pa.DataType], input_types: List[pa.DataType], return_type: pa.DataType, - volatility: str - ) -> None: ... - + volatility: str, + ) -> None: ... def __call__(self, *args: Expr) -> Expr: ... class AggregateUDF: @@ -384,8 +299,7 @@ class AggregateUDF: return_type: pa.DataType, state_type: List[pa.DataType], volatility: str, - ) -> None: ... - + ) -> None: ... def __call__(self, *args: Expr) -> Expr: ... class WindowUDF: @@ -396,90 +310,44 @@ class WindowUDF: input_types: List[pa.DataType], return_type: pa.DataType, volatility: str, - ) -> None: ... - + ) -> None: ... def __call__(self, *args: Expr) -> Expr: ... class Config: def __init__(self) -> None: ... - @staticmethod def from_env() -> Config: ... - - def get(self, key: str) -> Optional[str]: - ... - - def set(self, key: str, value: object): - ... - - def get_all(self) -> Dict[str, Optional[str]]: - ... - + def get(self, key: str) -> Optional[str]: ... + def set(self, key: str, value: object): ... + def get_all(self) -> Dict[str, Optional[str]]: ... class LogicalPlan: - def to_variant(self) -> Any: - ... - - def inputs(self) -> List[LogicalPlan]: - ... - - def display(self) -> str: - ... - - def display_indent(self) -> str: - ... - - def display_indent_schema(self) -> str: - ... - - def display_graphviz(self) -> str: - ... - - def to_proto(self) -> bytes: - ... - + def to_variant(self) -> Any: ... + def inputs(self) -> List[LogicalPlan]: ... + def display(self) -> str: ... + def display_indent(self) -> str: ... + def display_indent_schema(self) -> str: ... + def display_graphviz(self) -> str: ... + def to_proto(self) -> bytes: ... @staticmethod - def from_proto(ctx: SessionContext, proto_msg: bytes) -> LogicalPlan: - ... + def from_proto(ctx: SessionContext, proto_msg: bytes) -> LogicalPlan: ... class ExecutionPlan: - def children(self) -> List[ExecutionPlan]: - ... - - def display(self) -> str: - ... - - def display_indent(self) -> str: - ... - - def to_proto(self) -> bytes: - ... - + def children(self) -> List[ExecutionPlan]: ... + def display(self) -> str: ... + def display_indent(self) -> str: ... + def to_proto(self) -> bytes: ... @staticmethod - def from_proto(ctx: SessionContext, proto_msg: bytes) -> ExecutionPlan: - ... - + def from_proto(ctx: SessionContext, proto_msg: bytes) -> ExecutionPlan: ... @property def partition_count(self) -> int: ... - class RecordBatch: def to_pyarrow(self) -> pa.RecordBatch: ... - class RecordBatchStream: - def next(self) -> RecordBatch: - ... - - def __next__(self) -> RecordBatch: - ... - - async def __anext__(self) -> RecordBatch: - ... - - def __iter__(self) -> RecordBatch: - ... - - async def __aiter__(self) -> RecordBatch: - ... - + def next(self) -> RecordBatch: ... + def __next__(self) -> RecordBatch: ... + async def __anext__(self) -> RecordBatch: ... + def __iter__(self) -> RecordBatch: ... + async def __aiter__(self) -> RecordBatch: ... diff --git a/python/datafusion/_internal/common.pyi b/python/datafusion/_internal/common.pyi index c0835cfc7..bbb0226c6 100644 --- a/python/datafusion/_internal/common.pyi +++ b/python/datafusion/_internal/common.pyi @@ -20,15 +20,10 @@ from typing import List, Optional, Tuple class DFSchema: @staticmethod - def empty() -> DFSchema: - ... - - def field_names(self) -> List[str]: - ... - -class DataType: - ... + def empty() -> DFSchema: ... + def field_names(self) -> List[str]: ... +class DataType: ... class RexType(enum.IntEnum): Alias = 0 @@ -38,7 +33,6 @@ class RexType(enum.IntEnum): ScalarSubquery = 4 Other = 5 - class PythonType(enum.IntEnum): Array = 0 Bool = 1 @@ -51,7 +45,6 @@ class PythonType(enum.IntEnum): Object = 8 Str = 9 - class SqlType(enum.IntEnum): ANY = 0 ARRAY = 1 @@ -102,161 +95,111 @@ class SqlType(enum.IntEnum): VARBINARY = 46 VARCHAR = 47 - class DataTypeMap: - def __init__(self, arrow_type: DataType, python_type: PythonType, sql_type: SqlType) -> None: ... - + def __init__( + self, arrow_type: DataType, python_type: PythonType, sql_type: SqlType + ) -> None: ... @staticmethod def from_parquet_type_str(parquet_str_type: str) -> DataTypeMap: ... - @staticmethod def arrow(arrow_type: DataType) -> DataTypeMap: ... - @staticmethod def arrow_str(arrow_type_str: str) -> DataTypeMap: ... - @staticmethod def sql(sql_type: SqlType) -> DataTypeMap: ... - def friendly_arrow_type_name(self) -> str: ... - @property def arrow_type(self) -> DataType: ... - @arrow_type.setter def arrow_type(self, arrow_type: DataType): ... - @property def python_type(self) -> PythonType: ... - @python_type.setter def python_type(self, python_type: PythonType): ... - @property def sql_type(self) -> SqlType: ... - @sql_type.setter def sql_type(self, sql_type: SqlType): ... - class NullTreatment(enum.IntEnum): IGNORE_NULLS = 0 RESPECT_NULLS = 1 - class SqlSchema: - def __init__(self, schema_name: str) -> None: ... - def table_by_name(self, table_name: str) -> Optional[SqlTable]: ... - def add_table(self, table: SqlTable): ... - def drop_table(self, table_name: str): ... - @property def name(self) -> str: ... - @name.setter def name(self, name: str): ... - @property def tables(self) -> List[SqlTable]: ... - @tables.setter def tables(self, tables: List[SqlTable]): ... - @property def views(self) -> List[SqlView]: ... - @views.setter def views(self, views: List[SqlView]): ... - @property def functions(self) -> List[SqlFunction]: ... - @functions.setter def functions(self, functions: List[SqlFunction]): ... - class SqlTable: def __init__( self, table_name: str, columns: List[Tuple[str, DataTypeMap]], row_count: int, - filepaths: Optional[List[str]] = None - ) -> None: ... - + filepaths: Optional[List[str]] = None, + ) -> None: ... @property def name(self) -> str: ... - @name.setter def name(self, name: str): ... - @property def columns(self) -> List[Tuple[str, DataTypeMap]]: ... - @columns.setter def columns(self, columns: List[Tuple[str, DataTypeMap]]): ... - @property def primary_key(self) -> Optional[str]: ... - @primary_key.setter def primary_key(self, primary_key: Optional[str]): ... - @property def foreign_keys(self) -> List[str]: ... - @foreign_keys.setter def foreign_keys(self, foreign_keys: List[str]): ... - @property def indexes(self) -> List[str]: ... - @indexes.setter def indexes(self, indexes: List[str]): ... - @property def constraints(self) -> List[str]: ... - @constraints.setter def constraints(self, constraints: List[str]): ... - @property def statistics(self) -> SqlStatistics: ... - @statistics.setter def statistics(self, statistics: SqlStatistics): ... - @property def filepaths(self) -> Optional[List[str]]: ... - @filepaths.setter def filepaths(self, filepaths: Optional[List[str]]): ... - class SqlView: - @property def name(self) -> str: ... - @name.setter def name(self, name: str): ... - @property def definition(self) -> str: ... - @definition.setter def definition(self, definition: str): ... - class SqlStatistics: def __init__(self, row_count: float) -> None: ... - def getRowCount(self) -> float: ... - -class SqlFunction: - ... +class SqlFunction: ... diff --git a/python/datafusion/_internal/expr/__init__.pyi b/python/datafusion/_internal/expr/__init__.pyi index ef21c0ba6..ba7cc9101 100644 --- a/python/datafusion/_internal/expr/__init__.pyi +++ b/python/datafusion/_internal/expr/__init__.pyi @@ -20,7 +20,18 @@ from .column import Column as Column from .binary_expr import BinaryExpr as BinaryExpr from .literal import Literal as Literal from .aggregate_expr import AggregateFunction as AggregateFunction -from .bool_expr import Not as Not, IsNotNull as IsNotNull, IsNull as IsNull, IsTrue as IsTrue, IsFalse as IsFalse, IsUnknown as IsUnknown, IsNotTrue as IsNotTrue, IsNotFalse as IsNotFalse, IsNotUnknown as IsNotUnknown, Negative as Negative +from .bool_expr import ( + Not as Not, + IsNotNull as IsNotNull, + IsNull as IsNull, + IsTrue as IsTrue, + IsFalse as IsFalse, + IsUnknown as IsUnknown, + IsNotTrue as IsNotTrue, + IsNotFalse as IsNotFalse, + IsNotUnknown as IsNotUnknown, + Negative as Negative, +) from .like import Like as Like, ILike as ILike, SimilarTo as SimilarTo from .scalar_variable import ScalarVariable as ScalarVariable from .alias import Alias as Alias @@ -56,4 +67,8 @@ from .sort_expr import SortExpr as SortExpr from .subquery_alias import SubqueryAlias as SubqueryAlias from .drop_table import DropTable as DropTable from .repartition import Partitioning as Partitioning, Repartition as Repartition -from .window import WindowExpr as WindowExpr, WindowFrame as WindowFrame, WindowFrameBound as WindowFrameBound +from .window import ( + WindowExpr as WindowExpr, + WindowFrame as WindowFrame, + WindowFrameBound as WindowFrameBound, +) diff --git a/python/datafusion/_internal/expr/aggregate.pyi b/python/datafusion/_internal/expr/aggregate.pyi index a1bc6d989..c6fe8a0b3 100644 --- a/python/datafusion/_internal/expr/aggregate.pyi +++ b/python/datafusion/_internal/expr/aggregate.pyi @@ -20,18 +20,11 @@ from .. import LogicalPlan from ..common import DFSchema from .base import Expr - class Aggregate: def group_by_exprs(self) -> List[Expr]: ... - def aggregate_exprs(self) -> List[Expr]: ... - def agg_expressions(self) -> List[Expr]: ... - def agg_func_name(self, expr: Expr) -> str: ... - def aggregation_arguments(self, expr: Expr) -> List[Expr]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/aggregate_expr.pyi b/python/datafusion/_internal/expr/aggregate_expr.pyi index 12207bcc8..b093e1b42 100644 --- a/python/datafusion/_internal/expr/aggregate_expr.pyi +++ b/python/datafusion/_internal/expr/aggregate_expr.pyi @@ -18,10 +18,7 @@ from typing import List from .base import Expr - class AggregateFunction: def aggregate_type(self) -> str: ... - def is_distinct(self) -> bool: ... - def args(self) -> List[Expr]: ... diff --git a/python/datafusion/_internal/expr/alias.pyi b/python/datafusion/_internal/expr/alias.pyi index 02acd669c..af9d4fb3d 100644 --- a/python/datafusion/_internal/expr/alias.pyi +++ b/python/datafusion/_internal/expr/alias.pyi @@ -19,5 +19,4 @@ from .base import Expr class Alias: def alias(self) -> str: ... - def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/analyze.pyi b/python/datafusion/_internal/expr/analyze.pyi index 203afaab4..979edd2ac 100644 --- a/python/datafusion/_internal/expr/analyze.pyi +++ b/python/datafusion/_internal/expr/analyze.pyi @@ -19,10 +19,7 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class Analyze: def verbose(self) -> bool: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/base.pyi b/python/datafusion/_internal/expr/base.pyi index f368ec74c..a91914a05 100644 --- a/python/datafusion/_internal/expr/base.pyi +++ b/python/datafusion/_internal/expr/base.pyi @@ -23,103 +23,56 @@ from .. import LogicalPlan from .window import WindowFrame from .sort_expr import SortExpr - class Expr: - def to_variant(self) -> Any: - ... - - def schema_name(self) -> str: - ... - - def canonical_name(self) -> str: - ... - - def variant_name(self) -> str: - ... - + def to_variant(self) -> Any: ... + def schema_name(self) -> str: ... + def canonical_name(self) -> str: ... + def variant_name(self) -> str: ... def __richcmp__(self, other: Expr, op: int) -> Expr: ... - def __add__(self, rhs: Expr) -> Expr: ... - def __sub__(self, rhs: Expr) -> Expr: ... - def __truediv__(self, rhs: Expr) -> Expr: ... - def __mul__(self, rhs: Expr) -> Expr: ... - def __mod__(self, rhs: Expr) -> Expr: ... - def __and__(self, rhs: Expr) -> Expr: ... - def __or__(self, rhs: Expr) -> Expr: ... - def __invert__(self) -> Expr: ... - def __getitem__(self, key: str) -> Expr: ... - @staticmethod def literal(value: Any) -> Expr: ... - @staticmethod def column(value: str) -> Expr: ... - def alias(self, name: str) -> Expr: ... - def sort(self, ascending: bool = True, nulls_first: bool = True) -> Expr: ... - def is_null(self) -> Expr: ... - def is_not_null(self) -> Expr: ... - def cast(self, to: pa.DataType) -> Expr: ... - def between(self, low: Expr, high: Expr, negated: bool = False) -> Expr: ... - def rex_type(self) -> RexType: ... - def types(self) -> DataTypeMap: ... - def python_value(self) -> Any: ... - def rex_call_operands(self) -> List[Expr]: ... - def rex_call_operator(self) -> str: ... - def column_name(self, plan: LogicalPlan) -> str: ... - def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... - def filter(self, filter: Expr) -> ExprFuncBuilder: ... - def distinct(self) -> ExprFuncBuilder: ... - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... - - def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... - + def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... - def over( self, partition_by: Optional[List[Expr]] = None, window_frame: Optional[WindowFrame] = None, order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[NullTreatment] = None) -> Expr: - ... + null_treatment: Optional[NullTreatment] = None, + ) -> Expr: ... class ExprFuncBuilder: def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... - def filter(self, filter: Expr) -> ExprFuncBuilder: ... - def distinct(self) -> ExprFuncBuilder: ... - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... - - def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... - + def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... - def build(self) -> Expr: ... - - diff --git a/python/datafusion/_internal/expr/between.pyi b/python/datafusion/_internal/expr/between.pyi index 2c79e698d..a693225c4 100644 --- a/python/datafusion/_internal/expr/between.pyi +++ b/python/datafusion/_internal/expr/between.pyi @@ -17,12 +17,8 @@ from .base import Expr - class Between: def expr(self) -> Expr: ... - def negated(self) -> bool: ... - def low(self) -> Expr: ... - def high(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/binary_expr.pyi b/python/datafusion/_internal/expr/binary_expr.pyi index bf0149b82..bb4f11e32 100644 --- a/python/datafusion/_internal/expr/binary_expr.pyi +++ b/python/datafusion/_internal/expr/binary_expr.pyi @@ -17,10 +17,7 @@ from .base import Expr - class BinaryExpr: def left(self) -> Expr: ... - def right(self) -> Expr: ... - def on(self) -> str: ... diff --git a/python/datafusion/_internal/expr/bool_expr.pyi b/python/datafusion/_internal/expr/bool_expr.pyi index 8eebf534d..3d0f1d846 100644 --- a/python/datafusion/_internal/expr/bool_expr.pyi +++ b/python/datafusion/_internal/expr/bool_expr.pyi @@ -17,7 +17,6 @@ from .base import Expr - class Not: def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/case.pyi b/python/datafusion/_internal/expr/case.pyi index bf5ac1ccd..9c6cd8ef9 100644 --- a/python/datafusion/_internal/expr/case.pyi +++ b/python/datafusion/_internal/expr/case.pyi @@ -18,10 +18,7 @@ from typing import List, Optional, Tuple from .base import Expr - class Case: def expr(self) -> Optional[Expr]: ... - def when_then_expr(self) -> List[Tuple[Expr, Expr]]: ... - def else_expr(self) -> Optional[Expr]: ... diff --git a/python/datafusion/_internal/expr/cast.pyi b/python/datafusion/_internal/expr/cast.pyi index 75e372ad0..68d450d71 100644 --- a/python/datafusion/_internal/expr/cast.pyi +++ b/python/datafusion/_internal/expr/cast.pyi @@ -18,14 +18,10 @@ from .base import Expr from ..common import DataType - class Cast: - def expr(self) -> Expr: ... - + def expr(self) -> Expr: ... def data_type(self) -> DataType: ... - class TryCast: - def expr(self) -> Expr: ... - + def expr(self) -> Expr: ... def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/column.pyi b/python/datafusion/_internal/expr/column.pyi index 57be204b6..d7d0ec167 100644 --- a/python/datafusion/_internal/expr/column.pyi +++ b/python/datafusion/_internal/expr/column.pyi @@ -19,8 +19,5 @@ from typing import Optional class Column: def name(self) -> str: ... - def relation(self) -> Optional[str]: ... - def qualified_name(self) -> str: ... - diff --git a/python/datafusion/_internal/expr/conditional_expr.pyi b/python/datafusion/_internal/expr/conditional_expr.pyi index a55ae352d..81d09528b 100644 --- a/python/datafusion/_internal/expr/conditional_expr.pyi +++ b/python/datafusion/_internal/expr/conditional_expr.pyi @@ -17,10 +17,7 @@ from .base import Expr - class CaseBuilder: def when(self, when: Expr, then: Expr) -> CaseBuilder: ... - def otherwise(self, else_expr: Expr) -> Expr: ... - def end(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/create_memory_table.pyi b/python/datafusion/_internal/expr/create_memory_table.pyi index 10cadfe88..60bfa8c44 100644 --- a/python/datafusion/_internal/expr/create_memory_table.pyi +++ b/python/datafusion/_internal/expr/create_memory_table.pyi @@ -18,7 +18,6 @@ from typing import List from .. import LogicalPlan - class CreateMemoryTable: def name(self) -> str: ... def input(self) -> List[LogicalPlan]: ... diff --git a/python/datafusion/_internal/expr/create_view.pyi b/python/datafusion/_internal/expr/create_view.pyi index deec726a5..01304d831 100644 --- a/python/datafusion/_internal/expr/create_view.pyi +++ b/python/datafusion/_internal/expr/create_view.pyi @@ -18,7 +18,6 @@ from typing import List, Optional from .. import LogicalPlan - class CreateView: def name(self) -> str: ... def input(self) -> List[LogicalPlan]: ... diff --git a/python/datafusion/_internal/expr/distinct.pyi b/python/datafusion/_internal/expr/distinct.pyi index 747be4e2a..3041451dc 100644 --- a/python/datafusion/_internal/expr/distinct.pyi +++ b/python/datafusion/_internal/expr/distinct.pyi @@ -18,8 +18,6 @@ from typing import List from .. import LogicalPlan - class Distinct: def input(self) -> List[LogicalPlan]: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/drop_table.pyi b/python/datafusion/_internal/expr/drop_table.pyi index 94b084ab0..0df01b49c 100644 --- a/python/datafusion/_internal/expr/drop_table.pyi +++ b/python/datafusion/_internal/expr/drop_table.pyi @@ -18,12 +18,8 @@ from typing import List from .. import LogicalPlan - class DropTable: def name(self) -> str: ... - def input(self) -> List[LogicalPlan]: ... - def if_exists(self) -> bool: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/empty_relation.pyi b/python/datafusion/_internal/expr/empty_relation.pyi index 2b3902ad8..f062d167e 100644 --- a/python/datafusion/_internal/expr/empty_relation.pyi +++ b/python/datafusion/_internal/expr/empty_relation.pyi @@ -17,10 +17,7 @@ from ..common import DFSchema - class EmptyRelation: def produce_one_row(self) -> bool: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/exists.pyi b/python/datafusion/_internal/expr/exists.pyi index 75e8f3432..9395398d7 100644 --- a/python/datafusion/_internal/expr/exists.pyi +++ b/python/datafusion/_internal/expr/exists.pyi @@ -17,8 +17,6 @@ from .subquery import Subquery - class Exists: def subquery(self) -> Subquery: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/explain.pyi b/python/datafusion/_internal/expr/explain.pyi index 98ac70fcf..de1bbcb6e 100644 --- a/python/datafusion/_internal/expr/explain.pyi +++ b/python/datafusion/_internal/expr/explain.pyi @@ -19,15 +19,9 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class Explain: def explain_string(self) -> List[str]: ... - def verbose(self) -> bool: ... - def plan(self) -> LogicalPlan: ... - def schema(self) -> DFSchema: ... - def logical_optimization_succceeded(self) -> bool: ... - diff --git a/python/datafusion/_internal/expr/filter.pyi b/python/datafusion/_internal/expr/filter.pyi index fc7b148a6..c9f9678e1 100644 --- a/python/datafusion/_internal/expr/filter.pyi +++ b/python/datafusion/_internal/expr/filter.pyi @@ -20,10 +20,7 @@ from .base import Expr from .. import LogicalPlan from ..common import DFSchema - class Filter: def predicate(self) -> Expr: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/grouping_set.pyi b/python/datafusion/_internal/expr/grouping_set.pyi index 4c2632e1c..2f46b09c9 100644 --- a/python/datafusion/_internal/expr/grouping_set.pyi +++ b/python/datafusion/_internal/expr/grouping_set.pyi @@ -15,5 +15,4 @@ # specific language governing permissions and limitations # under the License. -class GroupingSet: - ... +class GroupingSet: ... diff --git a/python/datafusion/_internal/expr/in_list.pyi b/python/datafusion/_internal/expr/in_list.pyi index 0469880db..9ef53b7ca 100644 --- a/python/datafusion/_internal/expr/in_list.pyi +++ b/python/datafusion/_internal/expr/in_list.pyi @@ -20,7 +20,5 @@ from .base import Expr class InList: def expr(self) -> Expr: ... - def list(self) -> List[Expr]: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/in_subquery.pyi b/python/datafusion/_internal/expr/in_subquery.pyi index 400bad460..97d7c10be 100644 --- a/python/datafusion/_internal/expr/in_subquery.pyi +++ b/python/datafusion/_internal/expr/in_subquery.pyi @@ -18,10 +18,7 @@ from .base import Expr from .subquery import Subquery - class InSubquery: def expr(self) -> Expr: ... - def subquery(self) -> Subquery: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/join.pyi b/python/datafusion/_internal/expr/join.pyi index f0c4692f8..5827dc7f6 100644 --- a/python/datafusion/_internal/expr/join.pyi +++ b/python/datafusion/_internal/expr/join.pyi @@ -20,30 +20,18 @@ from .. import LogicalPlan from .base import Expr from ..common import DFSchema - class JoinType: def is_outer(self) -> bool: ... -class JoinConstraint: - ... - +class JoinConstraint: ... class Join: def left(self) -> LogicalPlan: ... - def right(self) -> LogicalPlan: ... - def on(self) -> List[Tuple[Expr, Expr]]: ... - def filter(self) -> Optional[Expr]: ... - def join_type(self) -> JoinType: ... - def join_constraint(self) -> JoinConstraint: ... - def schema(self) -> DFSchema: ... - def null_equals_null(self) -> bool: ... - def __name__(self) -> str: ... - diff --git a/python/datafusion/_internal/expr/like.pyi b/python/datafusion/_internal/expr/like.pyi index 78f955551..d2e81eb7c 100644 --- a/python/datafusion/_internal/expr/like.pyi +++ b/python/datafusion/_internal/expr/like.pyi @@ -18,31 +18,20 @@ from typing import Optional from .base import Expr - class Like: def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... - class ILike: def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... class SimilarTo: def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... diff --git a/python/datafusion/_internal/expr/limit.pyi b/python/datafusion/_internal/expr/limit.pyi index d8e111398..482e1e6a1 100644 --- a/python/datafusion/_internal/expr/limit.pyi +++ b/python/datafusion/_internal/expr/limit.pyi @@ -19,8 +19,6 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class Limit: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/literal.pyi b/python/datafusion/_internal/expr/literal.pyi index ac767277b..478990bee 100644 --- a/python/datafusion/_internal/expr/literal.pyi +++ b/python/datafusion/_internal/expr/literal.pyi @@ -17,45 +17,24 @@ from typing import Optional, Tuple - class Literal: def data_type(self) -> str: ... - def value_f32(self) -> Optional[float]: ... - def value_f64(self) -> Optional[float]: ... - def value_decimal128(self) -> Tuple[Optional[int], int, int]: ... - def value_i8(self) -> Optional[int]: ... - def value_i16(self) -> Optional[int]: ... - def value_i32(self) -> Optional[int]: ... - def value_i64(self) -> Optional[int]: ... - def value_u8(self) -> Optional[int]: ... - def value_u16(self) -> Optional[int]: ... - def value_u32(self) -> Optional[int]: ... - def value_u64(self) -> Optional[int]: ... - def value_date32(self) -> Optional[int]: ... - def value_date64(self) -> Optional[int]: ... - def value_time64(self) -> Optional[int]: ... - def value_timestamp(self) -> Tuple[Optional[int], Optional[int]]: ... - def value_bool(self) -> Optional[bool]: ... - def value_string(self) -> Optional[str]: ... - def value_interval_day_time(self) -> Optional[Tuple[int, int]]: ... - def into_type(self) -> Literal: ... - diff --git a/python/datafusion/_internal/expr/placeholder.pyi b/python/datafusion/_internal/expr/placeholder.pyi index e62fa8128..368bb4692 100644 --- a/python/datafusion/_internal/expr/placeholder.pyi +++ b/python/datafusion/_internal/expr/placeholder.pyi @@ -17,7 +17,6 @@ from ..common import DataType - class Placeholder: def id(self) -> str: ... def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/projection.pyi b/python/datafusion/_internal/expr/projection.pyi index c71252d48..8964b7c7c 100644 --- a/python/datafusion/_internal/expr/projection.pyi +++ b/python/datafusion/_internal/expr/projection.pyi @@ -20,12 +20,8 @@ from .base import Expr from .. import LogicalPlan from ..common import DFSchema - class Projection: def projections(self) -> List[Expr]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/repartition.pyi b/python/datafusion/_internal/expr/repartition.pyi index 5cbcc9f18..ccb148ee1 100644 --- a/python/datafusion/_internal/expr/repartition.pyi +++ b/python/datafusion/_internal/expr/repartition.pyi @@ -19,7 +19,6 @@ from typing import List from .. import LogicalPlan from .base import Expr - class Repartition: def input(self) -> List[LogicalPlan]: ... def partitioning_scheme(self) -> Partitioning: ... @@ -27,5 +26,4 @@ class Repartition: def distribute_columns(self) -> str: ... def __name__(self) -> str: ... -class Partitioning: - ... +class Partitioning: ... diff --git a/python/datafusion/_internal/expr/scalar_subquery.pyi b/python/datafusion/_internal/expr/scalar_subquery.pyi index 11ff0949f..c7dc6aa49 100644 --- a/python/datafusion/_internal/expr/scalar_subquery.pyi +++ b/python/datafusion/_internal/expr/scalar_subquery.pyi @@ -17,6 +17,5 @@ from .subquery import Subquery - class ScalarSubquery: def subquery(self) -> Subquery: ... diff --git a/python/datafusion/_internal/expr/scalar_variable.pyi b/python/datafusion/_internal/expr/scalar_variable.pyi index 490d31174..8cbf25873 100644 --- a/python/datafusion/_internal/expr/scalar_variable.pyi +++ b/python/datafusion/_internal/expr/scalar_variable.pyi @@ -18,10 +18,6 @@ from typing import List from ..common import DataType - class ScalarVariable: - def data_type(self) -> DataType: - ... - - def variables(self) -> List[str]: - ... + def data_type(self) -> DataType: ... + def variables(self) -> List[str]: ... diff --git a/python/datafusion/_internal/expr/sort.pyi b/python/datafusion/_internal/expr/sort.pyi index 437ffc2d0..a0fe8f532 100644 --- a/python/datafusion/_internal/expr/sort.pyi +++ b/python/datafusion/_internal/expr/sort.pyi @@ -21,12 +21,8 @@ from .. import LogicalPlan from ..common import DFSchema from .sort_expr import SortExpr - class Sort: def sort_exprs(self) -> List[SortExpr]: ... - def get_fetch_val(self) -> Optional[int]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/sort_expr.pyi b/python/datafusion/_internal/expr/sort_expr.pyi index af5720aec..669fbafe3 100644 --- a/python/datafusion/_internal/expr/sort_expr.pyi +++ b/python/datafusion/_internal/expr/sort_expr.pyi @@ -17,7 +17,6 @@ from .base import Expr - class SortExpr: def __init__( self, @@ -25,9 +24,6 @@ class SortExpr: asc: bool, nulls_first: bool, ) -> None: ... - def expr(self) -> Expr: ... - def ascending(self) -> bool: ... - def nulls_first(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/subquery.pyi b/python/datafusion/_internal/expr/subquery.pyi index 07847e92e..4ef8e696c 100644 --- a/python/datafusion/_internal/expr/subquery.pyi +++ b/python/datafusion/_internal/expr/subquery.pyi @@ -19,8 +19,5 @@ from typing import List from .. import LogicalPlan class Subquery: - def input(self) -> List[LogicalPlan]: - ... - - def __name__(self) -> str: - ... + def input(self) -> List[LogicalPlan]: ... + def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/subquery_alias.pyi b/python/datafusion/_internal/expr/subquery_alias.pyi index 7de080931..5905f7d93 100644 --- a/python/datafusion/_internal/expr/subquery_alias.pyi +++ b/python/datafusion/_internal/expr/subquery_alias.pyi @@ -19,12 +19,8 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class SubqueryAlias: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def alias(self) -> str: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/table_scan.pyi b/python/datafusion/_internal/expr/table_scan.pyi index 5f28aefe4..fadfb3ab0 100644 --- a/python/datafusion/_internal/expr/table_scan.pyi +++ b/python/datafusion/_internal/expr/table_scan.pyi @@ -19,17 +19,10 @@ from typing import List, Optional, Tuple from ..common import DFSchema from .base import Expr - class TableScan: def table_name(self) -> str: ... - def fqn(self) -> Tuple[Optional[str], Optional[str], str]: ... - def projection(self) -> List[Tuple[int, str]]: ... - def schema(self) -> DFSchema: ... - def filters(self) -> List[Expr]: ... - def fetch(self) -> Optional[int]: ... - diff --git a/python/datafusion/_internal/expr/union.pyi b/python/datafusion/_internal/expr/union.pyi index 52e67fee3..4d4c63da3 100644 --- a/python/datafusion/_internal/expr/union.pyi +++ b/python/datafusion/_internal/expr/union.pyi @@ -19,10 +19,7 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class Union: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest.pyi b/python/datafusion/_internal/expr/unnest.pyi index b2ff582c2..a6d7d4af5 100644 --- a/python/datafusion/_internal/expr/unnest.pyi +++ b/python/datafusion/_internal/expr/unnest.pyi @@ -19,10 +19,7 @@ from typing import List from .. import LogicalPlan from ..common import DFSchema - class Unnest: def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest_expr.pyi b/python/datafusion/_internal/expr/unnest_expr.pyi index 7052f9ae7..ea787a00d 100644 --- a/python/datafusion/_internal/expr/unnest_expr.pyi +++ b/python/datafusion/_internal/expr/unnest_expr.pyi @@ -17,8 +17,6 @@ from .base import Expr - class UnnestExpr: def expr(self) -> Expr: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/window.pyi b/python/datafusion/_internal/expr/window.pyi index e839c87f6..f47438d0f 100644 --- a/python/datafusion/_internal/expr/window.pyi +++ b/python/datafusion/_internal/expr/window.pyi @@ -20,46 +20,29 @@ from ..common import DFSchema from .base import Expr from .sort_expr import SortExpr - class WindowExpr: def schema(self) -> DFSchema: ... - def get_window_expr(self) -> List[Expr]: ... - def get_sort_exprs(self, expr: Expr) -> List[SortExpr]: ... - def get_partition_exprs(self, expr: Expr) -> List[Expr]: ... - def get_args(self, expr: Expr) -> List[Expr]: ... - def window_func_name(self, expr: Expr) -> str: ... - def get_frame(self, expr: Expr) -> Optional[WindowFrame]: ... - class WindowFrame: def __init__( self, unit: str, start_bound: Optional[Any], end_bound: Optional[Any], - ) -> None: ... - + ) -> None: ... def get_frame_units(self) -> str: ... - def get_lower_bound(self) -> WindowFrameBound: ... - def get_upper_bound(self) -> WindowFrameBound: ... - class WindowFrameBound: def is_current_row(self) -> bool: ... - def is_preceding(self) -> bool: ... - def is_following(self) -> bool: ... - def get_offset(self) -> Optional[int]: ... - def is_unbounded(self) -> bool: ... - diff --git a/python/datafusion/_internal/functions/__init__.pyi b/python/datafusion/_internal/functions/__init__.pyi index 3807d715d..9d10bceac 100644 --- a/python/datafusion/_internal/functions/__init__.pyi +++ b/python/datafusion/_internal/functions/__init__.pyi @@ -20,306 +20,186 @@ from ..expr import Expr, SortExpr, CaseBuilder, WindowFrame from .. import SessionContext def in_list(expr: Expr, value: List[Expr], negated: bool) -> Expr: ... - def make_array(exprs: List[Expr]) -> Expr: ... - def array_concat(exprs: List[Expr]) -> Expr: ... - def array_cat(exprs: List[Expr]) -> Expr: ... - def array_position(array: Expr, element: Expr, index: Optional[int] = None) -> Expr: ... - -def array_slice(array: Expr, begin: Expr, end: Expr, stride: Optional[Expr] = None) -> Expr: ... - +def array_slice( + array: Expr, begin: Expr, end: Expr, stride: Optional[Expr] = None +) -> Expr: ... def digest(value: Expr, method: Expr) -> Expr: ... - def concat(args: List[Expr]) -> Expr: ... - def concat_ws(sep: str, args: List[Expr]) -> Expr: ... - def regexp_like(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... - def regexp_match(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... - -def regexp_replace(string: Expr, pattern: Expr, replacement: Expr, flags: Optional[Expr] = None) -> Expr: ... - +def regexp_replace( + string: Expr, pattern: Expr, replacement: Expr, flags: Optional[Expr] = None +) -> Expr: ... def order_by(expr: Expr, asc: bool, nulls_first: bool) -> SortExpr: ... - def alias(expr: Expr, name: str) -> Expr: ... - def col(name: str) -> Expr: ... - def case(expr: Expr) -> CaseBuilder: ... - def when(when: Expr, then: Expr) -> CaseBuilder: ... - -def window(name: str, args: List[Expr], partition_by: Optional[List[Expr]], order_by: Optional[List[SortExpr]], window_frame: Optional[WindowFrame], ctx: Optional[SessionContext]) -> Expr: ... - +def window( + name: str, + args: List[Expr], + partition_by: Optional[List[Expr]], + order_by: Optional[List[SortExpr]], + window_frame: Optional[WindowFrame], + ctx: Optional[SessionContext], +) -> Expr: ... def abs(num: Expr) -> Expr: ... - def acos(num: Expr) -> Expr: ... - def acosh(num: Expr) -> Expr: ... - def ascii(num: Expr) -> Expr: ... - def asin(num: Expr) -> Expr: ... - def asinh(num: Expr) -> Expr: ... - def atan(num: Expr) -> Expr: ... - def atanh(num: Expr) -> Expr: ... - def atan2(y: Expr, x: Expr) -> Expr: ... - def bit_length(arg: Expr) -> Expr: ... - def btrim(*args: Expr) -> Expr: ... - def cbrt(num: Expr) -> Expr: ... - def ceil(num: Expr) -> Expr: ... - def character_length(string: Expr) -> Expr: ... - def length(string: Expr) -> Expr: ... - def char_length(string: Expr) -> Expr: ... - def chr(arg: Expr) -> Expr: ... - def coalesce(*args: Expr) -> Expr: ... - def cos(num: Expr) -> Expr: ... - def cosh(num: Expr) -> Expr: ... - def cot(num: Expr) -> Expr: ... - def degrees(num: Expr) -> Expr: ... - def decode(input: Expr, encoding: Expr) -> Expr: ... - def encode(input: Expr, encoding: Expr) -> Expr: ... - def ends_with(string: Expr, suffix: Expr) -> Expr: ... - def exp(num: Expr) -> Expr: ... - def factorial(num: Expr) -> Expr: ... - def floor(num: Expr) -> Expr: ... - def gcd(x: Expr, y: Expr) -> Expr: ... - def initcap(string: Expr) -> Expr: ... - def isnan(num: Expr) -> Expr: ... - def iszero(num: Expr) -> Expr: ... - def levenshtein(string1: Expr, string2: Expr) -> Expr: ... - def lcm(x: Expr, y: Expr) -> Expr: ... - def left(string: Expr, n: Expr) -> Expr: ... - def ln(num: Expr) -> Expr: ... - def log(base: Expr, num: Expr) -> Expr: ... - def log10(num: Expr) -> Expr: ... - def log2(num: Expr) -> Expr: ... - def lower(arg1: Expr) -> Expr: ... - def lpad(*args: Expr) -> Expr: ... - def ltrim(*args: Expr) -> Expr: ... - def md5(input_arg: Expr) -> Expr: ... - def nanvl(x: Expr, y: Expr) -> Expr: ... - def nvl(x: Expr, y: Expr) -> Expr: ... - def nullif(arg_1: Expr, arg_2: Expr) -> Expr: ... - def octet_length(args: Expr) -> Expr: ... - def overlay(*args: Expr) -> Expr: ... - def pi() -> Expr: ... - def power(base: Expr, exponent: Expr) -> Expr: ... - def radians(num: Expr) -> Expr: ... - def repeat(string: Expr, n: Expr) -> Expr: ... - def replace(string: Expr, from_: Expr, to: Expr) -> Expr: ... - def reverse(string: Expr) -> Expr: ... - def right(string: Expr, n: Expr) -> Expr: ... - def round(*args: Expr) -> Expr: ... - def rpad(*args: Expr) -> Expr: ... - def rtrim(*args: Expr) -> Expr: ... - def sha224(input_arg1: Expr) -> Expr: ... - def sha256(input_arg1: Expr) -> Expr: ... - def sha384(input_arg1: Expr) -> Expr: ... - def sha512(input_arg1: Expr) -> Expr: ... - def signum(num: Expr) -> Expr: ... - def sin(num: Expr) -> Expr: ... - def sinh(num: Expr) -> Expr: ... - def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr: ... - def sqrt(num: Expr) -> Expr: ... - def starts_with(string: Expr, prefix: Expr) -> Expr: ... - def strpos(string: Expr, substring: Expr) -> Expr: ... - def substr(string: Expr, position: Expr) -> Expr: ... - def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr: ... - def substring(string: Expr, position: Expr, length: Expr) -> Expr: ... - def find_in_set(string: Expr, string_list: Expr) -> Expr: ... - def tan(num: Expr) -> Expr: ... - def tanh(num: Expr) -> Expr: ... - def to_hex(arg1: Expr) -> Expr: ... - def now() -> Expr: ... - def to_timestamp(*args: Expr) -> Expr: ... - def to_timestamp_millis(*args: Expr) -> Expr: ... - def to_timestamp_micros(*args: Expr) -> Expr: ... - def to_timestamp_seconds(*args: Expr) -> Expr: ... - def to_unixtime(*args: Expr) -> Expr: ... - def current_date() -> Expr: ... - def current_time() -> Expr: ... - def date_part(part: Expr, date: Expr) -> Expr: ... - def date_trunc(part: Expr, date: Expr) -> Expr: ... - def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr: ... - def make_date(year: Expr, month: Expr, day: Expr) -> Expr: ... - def translate(string: Expr, from_: Expr, to: Expr) -> Expr: ... - def trim(*args: Expr) -> Expr: ... - def trunc(*args: Expr) -> Expr: ... - def upper(arg1: Expr) -> Expr: ... - def uuid() -> Expr: ... - def struct(*args: Expr) -> Expr: ... - def named_struct(*args: Expr) -> Expr: ... - def from_unixtime(unixtime: Expr) -> Expr: ... - def arrow_typeof(arg_1: Expr) -> Expr: ... - def arrow_cast(arg_1: Expr, datatype: Expr) -> Expr: ... - def random() -> Expr: ... - def array_append(array: Expr, element: Expr) -> Expr: ... - def array_to_string(array: Expr, delimiter: Expr) -> Expr: ... - def array_dims(array: Expr) -> Expr: ... - def array_distinct(array: Expr) -> Expr: ... - def array_element(array: Expr, element: Expr) -> Expr: ... - def array_empty(array: Expr) -> Expr: ... - def array_length(array: Expr) -> Expr: ... - def array_has(first_array: Expr, second_array: Expr) -> Expr: ... - def array_has_all(first_array: Expr, second_array: Expr) -> Expr: ... - def array_has_any(first_array: Expr, second_array: Expr) -> Expr: ... - def array_positions(array: Expr, element: Expr) -> Expr: ... - def array_ndims(array: Expr) -> Expr: ... - def array_prepend(element: Expr, array: Expr) -> Expr: ... - def array_pop_back(array: Expr) -> Expr: ... - def array_pop_front(array: Expr) -> Expr: ... - def array_remove(array: Expr, element: Expr) -> Expr: ... - def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: ... - def array_remove_all(array: Expr, element: Expr) -> Expr: ... - def array_repeat(element: Expr, count: Expr) -> Expr: ... - def array_replace(array: Expr, from_: Expr, to: Expr) -> Expr: ... - def array_replace_n(array: Expr, from_: Expr, to: Expr, mx: Expr) -> Expr: ... - def array_replace_all(array: Expr, from_: Expr, to: Expr) -> Expr: ... - def array_sort(array: Expr, desc: Expr, null_first: Expr) -> Expr: ... - def array_intersect(first_array: Expr, second_array: Expr) -> Expr: ... - def array_union(array1: Expr, array2: Expr) -> Expr: ... - def array_except(first_array: Expr, second_array: Expr) -> Expr: ... - def array_resize(array: Expr, size: Expr, value: Expr) -> Expr: ... - def cardinality(array: Expr) -> Expr: ... - def flatten(array: Expr) -> Expr: ... - def range(start: Expr, stop: Expr, step: Expr) -> Expr: ... class AggregateFunction(Protocol): - def __call__(self, exp: Expr, *, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: - ... + def __call__( + self, + exp: Expr, + *, + distinct: Optional[bool] = None, + filter: Optional[Expr] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[int] = None, + ) -> Expr: ... class AggregateFunctionYX(Protocol): - def __call__(self, y: Expr, x: Expr, *, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: - ... + def __call__( + self, + y: Expr, + x: Expr, + *, + distinct: Optional[bool] = None, + filter: Optional[Expr] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[int] = None, + ) -> Expr: ... array_agg: AggregateFunction max: AggregateFunction @@ -352,8 +232,12 @@ var_pop: AggregateFunction approx_distinct: AggregateFunction approx_median: AggregateFunction -def approx_percentile_cont(expression: Expr, percentile: float, num_centroids: Optional[int] = None, filter: Optional[Expr] = None) -> Expr: ... - +def approx_percentile_cont( + expression: Expr, + percentile: float, + num_centroids: Optional[int] = None, + filter: Optional[Expr] = None, +) -> Expr: ... def approx_percentile_cont_with_weight( expression: Expr, weight: Expr, @@ -363,23 +247,50 @@ def approx_percentile_cont_with_weight( last_value: AggregateFunction -def first_value(expr: Expr, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: ... - -def nth_value(expr: Expr, n: int, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: - ... - -def string_agg(expr: Expr, delimiter: str, distinct: Optional[bool] = None, filter: Optional[Expr] = None, order_by: Optional[List[SortExpr]] = None, null_treatment: Optional[int] = None) -> Expr: - ... - -def lead(arg: Expr, shift_offset: int, default_value: Optional[Any] = None, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: - ... - -def lag(arg: Expr, shift_offset: int, default_value: Optional[Any] = None, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: - ... +def first_value( + expr: Expr, + distinct: Optional[bool] = None, + filter: Optional[Expr] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[int] = None, +) -> Expr: ... +def nth_value( + expr: Expr, + n: int, + distinct: Optional[bool] = None, + filter: Optional[Expr] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[int] = None, +) -> Expr: ... +def string_agg( + expr: Expr, + delimiter: str, + distinct: Optional[bool] = None, + filter: Optional[Expr] = None, + order_by: Optional[List[SortExpr]] = None, + null_treatment: Optional[int] = None, +) -> Expr: ... +def lead( + arg: Expr, + shift_offset: int, + default_value: Optional[Any] = None, + partition_by: Optional[List[Expr]] = None, + order_by: Optional[List[SortExpr]] = None, +) -> Expr: ... +def lag( + arg: Expr, + shift_offset: int, + default_value: Optional[Any] = None, + partition_by: Optional[List[Expr]] = None, + order_by: Optional[List[SortExpr]] = None, +) -> Expr: ... class WindowFunction(Protocol): - def __call__(self, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: - ... + def __call__( + self, + partition_by: Optional[List[Expr]] = None, + order_by: Optional[List[SortExpr]] = None, + ) -> Expr: ... row_number: WindowFunction rank: WindowFunction @@ -387,6 +298,8 @@ dense_rank: WindowFunction percent_rank: WindowFunction cume_dist: WindowFunction -def ntile(arg: Expr, partition_by: Optional[List[Expr]] = None, order_by: Optional[List[SortExpr]] = None) -> Expr: - ... - +def ntile( + arg: Expr, + partition_by: Optional[List[Expr]] = None, + order_by: Optional[List[SortExpr]] = None, +) -> Expr: ... diff --git a/python/datafusion/_internal/object_store/__init__.pyi b/python/datafusion/_internal/object_store/__init__.pyi index fa7cbc21d..6e82564c7 100644 --- a/python/datafusion/_internal/object_store/__init__.pyi +++ b/python/datafusion/_internal/object_store/__init__.pyi @@ -17,7 +17,6 @@ from typing import List, Optional, Tuple - class LocalFileSystem: def __init__(self, prefix: Optional[str] = None) -> None: ... @@ -33,15 +32,15 @@ class MicrosoftAzure: tenant_id: Optional[str] = None, sas_query_pairs: Optional[List[Tuple[str, str]]] = None, use_emulator: Optional[bool] = None, - allow_http: Optional[bool] = None - ) -> None: ... + allow_http: Optional[bool] = None, + ) -> None: ... class GoogleCloud: def __init__( self, bucket_name: str, service_account_path: Optional[str] = None, - ) -> None: ... + ) -> None: ... class AmazonS3: def __init__( @@ -53,8 +52,7 @@ class AmazonS3: endpoint: Optional[str] = None, allow_http: bool = False, imdsv1_fallback: bool = False, - ) -> None: ... + ) -> None: ... class Http: def __init__(self, url: str) -> None: ... - diff --git a/python/datafusion/_internal/substrait/__init__.pyi b/python/datafusion/_internal/substrait/__init__.pyi index 37b0e7398..63aa717b9 100644 --- a/python/datafusion/_internal/substrait/__init__.pyi +++ b/python/datafusion/_internal/substrait/__init__.pyi @@ -17,38 +17,25 @@ from .. import SessionContext, LogicalPlan - class Plan: - def encode(self) -> bytes: - ... + def encode(self) -> bytes: ... class Serde: @staticmethod - def serialize(sql: str, ctx: SessionContext, path: str): - ... - + def serialize(sql: str, ctx: SessionContext, path: str): ... @staticmethod - def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: - ... - + def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: ... @staticmethod - def serialize_bytes(sql: str, ctx: SessionContext) -> bytes: - ... - + def serialize_bytes(sql: str, ctx: SessionContext) -> bytes: ... @staticmethod - def deserialize(path: str) -> Plan: - ... - + def deserialize(path: str) -> Plan: ... @staticmethod - def deserialize_bytes(proto_bytes: bytes) -> Plan: - ... + def deserialize_bytes(proto_bytes: bytes) -> Plan: ... class Producer: @staticmethod - def to_substrait_plan(plan: LogicalPlan, ctx: SessionContext) -> Plan: - ... + def to_substrait_plan(plan: LogicalPlan, ctx: SessionContext) -> Plan: ... class Consumer: @staticmethod - def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: - ... + def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: ... diff --git a/python/datafusion/context.py b/python/datafusion/context.py index b899e9d67..e84c9e3bc 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -782,7 +782,9 @@ def register_parquet( file_extension, skip_metadata, schema, - [[expr.raw_sort for expr in exprs] for exprs in file_sort_order] if file_sort_order is not None else None, + [[expr.raw_sort for expr in exprs] for exprs in file_sort_order] + if file_sort_order is not None + else None, ) def register_csv( diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 017a51829..0c09efc3d 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -452,8 +452,8 @@ def join( raise ValueError( "`left_on` or `right_on` should not provided with `on`" ) - left_on = on # type: ignore - right_on = on # type: ignore + left_on = on # type: ignore + right_on = on # type: ignore elif left_on is not None or right_on is not None: if left_on is None or right_on is None: raise ValueError("`left_on` and `right_on` should both be provided.") @@ -466,7 +466,7 @@ def join( if isinstance(right_on, str): right_on = [right_on] - return DataFrame(self.df.join(right.df, how, left_on, right_on)) # type: ignore + return DataFrame(self.df.join(right.df, how, left_on, right_on)) # type: ignore def join_on( self, @@ -726,7 +726,9 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram A DataFrame with the columns expanded. """ columns_inner = [c for c in columns] - return DataFrame(self.df.unnest_columns(columns_inner, preserve_nulls=preserve_nulls)) + return DataFrame( + self.df.unnest_columns(columns_inner, preserve_nulls=preserve_nulls) + ) def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any: """Export an Arrow PyCapsule Stream. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 6b8163aff..e2ec62d93 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -306,23 +306,23 @@ def __getitem__(self, key: str | int) -> Expr: ) return Expr(self.expr.__getitem__(key)) - def __eq__(self, rhs: Any) -> Expr: # type: ignore + def __eq__(self, rhs: Any) -> Expr: # type: ignore """Equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__eq__(rhs.expr)) # type: ignore + return Expr(self.expr.__eq__(rhs.expr)) # type: ignore - def __ne__(self, rhs: Any) -> Expr: # type: ignore + def __ne__(self, rhs: Any) -> Expr: # type: ignore """Not equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ne__(rhs.expr)) # type: ignore + return Expr(self.expr.__ne__(rhs.expr)) # type: ignore def __ge__(self, rhs: Any) -> Expr: """Greater than or equal to. @@ -331,7 +331,7 @@ def __ge__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ge__(rhs.expr)) # type: ignore + return Expr(self.expr.__ge__(rhs.expr)) # type: ignore def __gt__(self, rhs: Any) -> Expr: """Greater than. @@ -340,7 +340,7 @@ def __gt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__gt__(rhs.expr)) # type: ignore + return Expr(self.expr.__gt__(rhs.expr)) # type: ignore def __le__(self, rhs: Any) -> Expr: """Less than or equal to. @@ -349,7 +349,7 @@ def __le__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__le__(rhs.expr)) # type: ignore + return Expr(self.expr.__le__(rhs.expr)) # type: ignore def __lt__(self, rhs: Any) -> Expr: """Less than. @@ -358,7 +358,7 @@ def __lt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__lt__(rhs.expr)) # type: ignore + return Expr(self.expr.__lt__(rhs.expr)) # type: ignore __radd__ = __add__ __rand__ = __and__ @@ -584,7 +584,9 @@ def over(self, window: Window) -> Expr: window: Window definition """ partition_by_raw = expr_list_to_raw_expr_list(window._partition_by) - order_by_raw = sort_list_to_raw_sort_list(window._order_by) if window._order_by else None + order_by_raw = ( + sort_list_to_raw_sort_list(window._order_by) if window._order_by else None + ) window_frame_raw = ( window._window_frame.window_frame if window._window_frame is not None diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 50d8c6a18..f8a7418af 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -430,10 +430,21 @@ def window( """ args_inner = [a.expr for a in args] partition_by_inner = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) window_frame_inner = window_frame.window_frame if window_frame is not None else None ctx_inner = ctx.ctx if ctx is not None else None - return Expr(f.window(name, args_inner, partition_by_inner, order_by_raw, window_frame_inner, ctx_inner)) + return Expr( + f.window( + name, + args_inner, + partition_by_inner, + order_by_raw, + window_frame_inner, + ctx_inner, + ) + ) # scalar functions @@ -779,7 +790,9 @@ def regexp_replace( flags_inner = flags.expr else: flags_inner = None - return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags_inner)) + return Expr( + f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags_inner) + ) def repeat(string: Expr, n: Expr) -> Expr: @@ -1710,7 +1723,9 @@ def array_agg( filter: If provided, only compute against rows for which the filter is True order_by: Order the resultant array values """ - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2208,7 +2223,9 @@ def first_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2240,7 +2257,9 @@ def last_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2274,7 +2293,9 @@ def nth_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2415,7 +2436,9 @@ def lead( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.lead( @@ -2467,7 +2490,9 @@ def lag( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.lag( @@ -2506,7 +2531,9 @@ def row_number( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.row_number( @@ -2547,7 +2574,9 @@ def rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.rank( @@ -2583,7 +2612,9 @@ def dense_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.dense_rank( @@ -2620,7 +2651,9 @@ def percent_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.percent_rank( @@ -2657,7 +2690,9 @@ def cume_dist( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.cume_dist( @@ -2698,7 +2733,9 @@ def ntile( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) return Expr( f.ntile( @@ -2730,7 +2767,9 @@ def string_agg( filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate """ - order_by_raw = sort_list_to_raw_sort_list(order_by) if order_by is not None else None + order_by_raw = ( + sort_list_to_raw_sort_list(order_by) if order_by is not None else None + ) filter_raw = filter.expr if filter is not None else None return Expr( diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 7e4dd68f1..566676e4e 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -270,12 +270,12 @@ def sum_bias_10() -> Summarize: """ # noqa W505 if not callable(accum): raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): # type: ignore + if not isinstance(accum.__call__(), Accumulator): # type: ignore raise TypeError( "Accumulator must implement the abstract base class Accumulator" ) if name is None: - name = accum.__call__().__class__.__qualname__.lower() # type: ignore + name = accum.__call__().__class__.__qualname__.lower() # type: ignore assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] @@ -533,12 +533,12 @@ def bias_10() -> BiasedNumbers: """ # noqa W505 if not callable(func): raise TypeError("`func` must be callable.") - if not isinstance(func.__call__(), WindowEvaluator): # type: ignore + if not isinstance(func.__call__(), WindowEvaluator): # type: ignore raise TypeError( "`func` must implement the abstract base class WindowEvaluator" ) if name is None: - name = func.__call__().__class__.__qualname__.lower() # type: ignore + name = func.__call__().__class__.__qualname__.lower() # type: ignore assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] From 3250848e67f30347d86f91eaa6e34b6f64e149ac Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 12 Jan 2025 16:00:09 +0800 Subject: [PATCH 5/9] update catalog --- python/datafusion/catalog.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 5a38c5727..c0324310e 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -66,9 +66,10 @@ def __init__(self, table: df_internal.Table) -> None: """This constructor is not typically called by the end user.""" self.table = table + @property def schema(self) -> pyarrow.Schema: """Returns the schema associated with this table.""" - return self.table.schema() + return self.table.schema @property def kind(self) -> str: From b165ef340378eac882db541ebfa6e900ae931c29 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 13 Feb 2025 20:07:31 +0800 Subject: [PATCH 6/9] revert type annotation --- python/datafusion/__init__.py | 2 +- python/datafusion/_internal/__init__.pyi | 353 ------------------ python/datafusion/_internal/common.pyi | 205 ---------- python/datafusion/_internal/expr/__init__.pyi | 74 ---- .../datafusion/_internal/expr/aggregate.pyi | 30 -- .../_internal/expr/aggregate_expr.pyi | 24 -- python/datafusion/_internal/expr/alias.pyi | 22 -- python/datafusion/_internal/expr/analyze.pyi | 25 -- python/datafusion/_internal/expr/base.pyi | 78 ---- python/datafusion/_internal/expr/between.pyi | 24 -- .../datafusion/_internal/expr/binary_expr.pyi | 23 -- .../datafusion/_internal/expr/bool_expr.pyi | 48 --- python/datafusion/_internal/expr/case.pyi | 24 -- python/datafusion/_internal/expr/cast.pyi | 27 -- python/datafusion/_internal/expr/column.pyi | 23 -- .../_internal/expr/conditional_expr.pyi | 23 -- .../_internal/expr/create_memory_table.pyi | 26 -- .../datafusion/_internal/expr/create_view.pyi | 26 -- python/datafusion/_internal/expr/distinct.pyi | 23 -- .../datafusion/_internal/expr/drop_table.pyi | 25 -- .../_internal/expr/empty_relation.pyi | 23 -- python/datafusion/_internal/expr/exists.pyi | 22 -- python/datafusion/_internal/expr/explain.pyi | 27 -- .../datafusion/_internal/expr/extension.pyi | 19 - python/datafusion/_internal/expr/filter.pyi | 26 -- .../_internal/expr/grouping_set.pyi | 18 - python/datafusion/_internal/expr/in_list.pyi | 24 -- .../datafusion/_internal/expr/in_subquery.pyi | 24 -- python/datafusion/_internal/expr/join.pyi | 37 -- python/datafusion/_internal/expr/like.pyi | 37 -- python/datafusion/_internal/expr/limit.pyi | 24 -- python/datafusion/_internal/expr/literal.pyi | 40 -- .../datafusion/_internal/expr/placeholder.pyi | 22 -- .../datafusion/_internal/expr/projection.pyi | 27 -- .../datafusion/_internal/expr/repartition.pyi | 29 -- .../_internal/expr/scalar_subquery.pyi | 21 -- .../_internal/expr/scalar_variable.pyi | 23 -- python/datafusion/_internal/expr/sort.pyi | 28 -- .../datafusion/_internal/expr/sort_expr.pyi | 29 -- python/datafusion/_internal/expr/subquery.pyi | 23 -- .../_internal/expr/subquery_alias.pyi | 26 -- .../datafusion/_internal/expr/table_scan.pyi | 28 -- python/datafusion/_internal/expr/union.pyi | 25 -- python/datafusion/_internal/expr/unnest.pyi | 25 -- .../datafusion/_internal/expr/unnest_expr.pyi | 22 -- python/datafusion/_internal/expr/window.pyi | 48 --- .../_internal/functions/__init__.pyi | 305 --------------- .../_internal/object_store/__init__.pyi | 58 --- .../_internal/substrait/__init__.pyi | 41 -- python/datafusion/context.py | 22 +- python/datafusion/dataframe.py | 22 +- python/datafusion/expr.py | 26 +- python/datafusion/functions.py | 158 +++----- python/datafusion/udf.py | 9 +- python/tests/test_functions.py | 25 +- 55 files changed, 121 insertions(+), 2347 deletions(-) delete mode 100644 python/datafusion/_internal/__init__.pyi delete mode 100644 python/datafusion/_internal/common.pyi delete mode 100644 python/datafusion/_internal/expr/__init__.pyi delete mode 100644 python/datafusion/_internal/expr/aggregate.pyi delete mode 100644 python/datafusion/_internal/expr/aggregate_expr.pyi delete mode 100644 python/datafusion/_internal/expr/alias.pyi delete mode 100644 python/datafusion/_internal/expr/analyze.pyi delete mode 100644 python/datafusion/_internal/expr/base.pyi delete mode 100644 python/datafusion/_internal/expr/between.pyi delete mode 100644 python/datafusion/_internal/expr/binary_expr.pyi delete mode 100644 python/datafusion/_internal/expr/bool_expr.pyi delete mode 100644 python/datafusion/_internal/expr/case.pyi delete mode 100644 python/datafusion/_internal/expr/cast.pyi delete mode 100644 python/datafusion/_internal/expr/column.pyi delete mode 100644 python/datafusion/_internal/expr/conditional_expr.pyi delete mode 100644 python/datafusion/_internal/expr/create_memory_table.pyi delete mode 100644 python/datafusion/_internal/expr/create_view.pyi delete mode 100644 python/datafusion/_internal/expr/distinct.pyi delete mode 100644 python/datafusion/_internal/expr/drop_table.pyi delete mode 100644 python/datafusion/_internal/expr/empty_relation.pyi delete mode 100644 python/datafusion/_internal/expr/exists.pyi delete mode 100644 python/datafusion/_internal/expr/explain.pyi delete mode 100644 python/datafusion/_internal/expr/extension.pyi delete mode 100644 python/datafusion/_internal/expr/filter.pyi delete mode 100644 python/datafusion/_internal/expr/grouping_set.pyi delete mode 100644 python/datafusion/_internal/expr/in_list.pyi delete mode 100644 python/datafusion/_internal/expr/in_subquery.pyi delete mode 100644 python/datafusion/_internal/expr/join.pyi delete mode 100644 python/datafusion/_internal/expr/like.pyi delete mode 100644 python/datafusion/_internal/expr/limit.pyi delete mode 100644 python/datafusion/_internal/expr/literal.pyi delete mode 100644 python/datafusion/_internal/expr/placeholder.pyi delete mode 100644 python/datafusion/_internal/expr/projection.pyi delete mode 100644 python/datafusion/_internal/expr/repartition.pyi delete mode 100644 python/datafusion/_internal/expr/scalar_subquery.pyi delete mode 100644 python/datafusion/_internal/expr/scalar_variable.pyi delete mode 100644 python/datafusion/_internal/expr/sort.pyi delete mode 100644 python/datafusion/_internal/expr/sort_expr.pyi delete mode 100644 python/datafusion/_internal/expr/subquery.pyi delete mode 100644 python/datafusion/_internal/expr/subquery_alias.pyi delete mode 100644 python/datafusion/_internal/expr/table_scan.pyi delete mode 100644 python/datafusion/_internal/expr/union.pyi delete mode 100644 python/datafusion/_internal/expr/unnest.pyi delete mode 100644 python/datafusion/_internal/expr/unnest_expr.pyi delete mode 100644 python/datafusion/_internal/expr/window.pyi delete mode 100644 python/datafusion/_internal/functions/__init__.pyi delete mode 100644 python/datafusion/_internal/object_store/__init__.pyi delete mode 100644 python/datafusion/_internal/substrait/__init__.pyi diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 399ef07cf..2d8db42c8 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -24,7 +24,7 @@ try: import importlib.metadata as importlib_metadata except ImportError: - import importlib_metadata # type: ignore + import importlib_metadata from .context import ( SessionContext, diff --git a/python/datafusion/_internal/__init__.pyi b/python/datafusion/_internal/__init__.pyi deleted file mode 100644 index 773f5c45e..000000000 --- a/python/datafusion/_internal/__init__.pyi +++ /dev/null @@ -1,353 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Set, Tuple -import pyarrow as pa -from pyarrow.dataset import Dataset -import pandas as pd -import polars as pl -from ..udf import Accumulator, WindowEvaluator -from ..context import ArrowStreamExportable, TableProviderExportable -from .expr import SortExpr, Expr - -class Catalog: - def names(self) -> List[str]: ... - def database(self, name: str = "public") -> Database: ... - -class Database: - def names(self) -> Set[str]: ... - def table(self, name: str) -> Table: ... - -class Table: - @property - def schema(self) -> pa.Schema: ... - @property - def kind(self) -> str: ... - -class SessionConfig: - def __init__(self, config_options: Optional[Dict[str, str]] = None) -> None: ... - def with_create_default_catalog_and_schema( - self, enabled: bool - ) -> SessionConfig: ... - def with_default_catalog_and_schema( - self, catalog: str, schema: str - ) -> SessionConfig: ... - def with_information_schema(self, enabled: bool) -> SessionConfig: ... - def with_batch_size(self, batch_size: int) -> SessionConfig: ... - def with_target_partitions(self, target_partitions: int) -> SessionConfig: ... - def with_repartition_aggregations(self, enabled: bool) -> SessionConfig: ... - def with_repartition_joins(self, enabled: bool) -> SessionConfig: ... - def with_repartition_windows(self, enabled: bool) -> SessionConfig: ... - def with_repartition_sorts(self, enabled: bool) -> SessionConfig: ... - def with_repartition_file_scans(self, enabled: bool) -> SessionConfig: ... - def with_repartition_file_min_size(self, size: int) -> SessionConfig: ... - def with_parquet_pruning(self, enabled: bool) -> SessionConfig: ... - def set(self, key: str, value: str) -> SessionConfig: ... - -class RuntimeEnvBuilder: - def __init__(self) -> None: ... - def with_disk_manager_disabled(self) -> RuntimeEnvBuilder: ... - def with_disk_manager_os(self) -> RuntimeEnvBuilder: ... - def with_disk_manager_specified(self, paths: List[str]) -> RuntimeEnvBuilder: ... - def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder: ... - def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder: ... - def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder: ... - def with_temp_file_path(self, path: str) -> RuntimeEnvBuilder: ... - -class SQLOptions: - def __init__(self) -> None: ... - def with_allow_ddl(self, allow: bool) -> SQLOptions: ... - def with_allow_dml(self, allow: bool) -> SQLOptions: ... - def with_allow_statements(self, allow: bool) -> SQLOptions: ... - -class SessionContext: - def __init__( - self, - config: Optional[SessionConfig] = None, - runtime: Optional[RuntimeEnvBuilder] = None, - ) -> None: ... - def enable_url_table(self) -> SessionContext: ... - def register_object_store( - self, schema: str, storage: Any, host: Optional[str] = None - ): ... - def register_listing_table( - self, - name: str, - path: str, - table_partition_cols: List[Tuple[str, str]] = [], - file_extension: str = ".parquet", - schema: Optional[pa.Schema] = None, - file_sort_order: Optional[List[List[SortExpr]]] = None, - ): ... - def sql(self, query: str) -> DataFrame: ... - def sql_with_options( - self, query: str, options: Optional[SQLOptions] = None - ) -> DataFrame: ... - def create_dataframe( - self, - partitions: List[List[pa.RecordBatch]], - name: Optional[str] = None, - schema: Optional[pa.Schema] = None, - ) -> DataFrame: ... - def create_dataframe_from_logical_plan(self, plan: LogicalPlan) -> DataFrame: ... - def from_pylist(self, data: list, name: Optional[str] = None) -> DataFrame: ... - def from_pydict(self, data: dict, name: Optional[str] = None) -> DataFrame: ... - def from_arrow( - self, - data: ArrowStreamExportable | pa.RecordBatchReader, - name: Optional[str] = None, - ) -> DataFrame: ... - def from_pandas( - self, data: pd.DataFrame, name: Optional[str] = None - ) -> DataFrame: ... - def from_polars( - self, data: pl.DataFrame, name: Optional[str] = None - ) -> DataFrame: ... - def register_table(self, data: str, table: Table): ... - def deregister_table(self, name: str): ... - def register_table_provider(self, name: str, provider: TableProviderExportable): ... - def register_record_batches( - self, name: str, partitions: List[List[pa.RecordBatch]] - ): ... - def register_parquet( - self, - name: str, - path: str, - table_partition_cols: List[Tuple[str, str]] = [], - parquet_pruning: bool = True, - file_extension: str = ".parquet", - skip_metadata: bool = True, - schema: Optional[pa.Schema] = None, - file_sort_order: Optional[List[List[SortExpr]]] = None, - **kwargs, - ): ... - def register_csv( - self, - name: str, - path: str | List[str], - schema: Optional[pa.Schema] = None, - has_header: bool = True, - delimiter: str = ",", - schema_infer_max_records: int = 1000, - file_extension: str = ".csv", - file_compression_type: Optional[str] = None, - **kwargs, - ): ... - def register_json( - self, - name: str, - path: str | Path, - schema: Optional[pa.Schema] = None, - schema_infer_max_records: int = 1000, - file_extension: str = ".json", - table_partition_cols: List[Tuple[str, str]] = [], - file_compression_type: Optional[str] = None, - **kwargs, - ): ... - def register_avro( - self, - name: str, - path: str | Path, - schema: Optional[pa.Schema] = None, - file_extension: str = ".avro", - table_partition_cols: List[Tuple[str, str]] = [], - ): ... - def register_dataset(self, name: str, dataset: Dataset): ... - def register_udf(self, udf: ScalarUDF): ... - def register_udaf(self, udaf: AggregateUDF): ... - def register_udwf(self, udwf: WindowUDF): ... - def catalog(self, name: str = "datafusion") -> Catalog: ... - def tables(self) -> Set[str]: ... - def table(self, name: str) -> DataFrame: ... - def table_exist(self, name: str) -> bool: ... - def empty_table(self) -> DataFrame: ... - def session_id(self) -> str: ... - def read_json( - self, - path: str | List[str], - schema: Optional[pa.Schema] = None, - schema_infer_max_records: int = 1000, - file_extension: str = ".json", - table_partition_cols: List[Tuple[str, str]] = [], - file_compression_type: Optional[str] = None, - **kwargs, - ): ... - def read_csv( - self, - path: str | List[str], - schema: Optional[pa.Schema] = None, - head_header: bool = True, - delimiter: str = ",", - schema_infer_max_records: int = 1000, - file_extension: str = ".csv", - table_partition_cols: List[Tuple[str, str]] = [], - file_compression_type: Optional[str] = None, - **kwargs, - ): ... - def read_parquet( - self, - path: str | List[str], - table_partition_cols: List[Tuple[str, str]] = [], - parquet_pruning: bool = True, - file_extension: str = ".parquet", - skip_metadata: bool = True, - schema: Optional[pa.Schema] = None, - file_sort_order: Optional[List[List[SortExpr]]] = None, - **kwargs, - ): ... - def read_avro( - self, - path: str, - schema: Optional[pa.Schema] = None, - table_partition_cols: List[Tuple[str, str]] = [], - file_extension: str = ".avro", - **kwargs, - ): ... - def read_table(self, table: Table) -> DataFrame: ... - def execute(self, plan: ExecutionPlan, part: int) -> RecordBatchStream: ... - -class DataFrame: - def __getitem__(self, key: str | List[str] | Tuple[str, ...]) -> DataFrame: ... - def _repr_html_(self) -> str: ... - def describe(self) -> DataFrame: ... - def schema(self) -> pa.Schema: ... - def select_columns(self, *args: str) -> DataFrame: ... - def select(self, *args: Expr) -> DataFrame: ... - def drop(self, *args: str) -> DataFrame: ... - def filter(self, predicate: Expr) -> DataFrame: ... - def with_column(self, name: str, expr: Expr) -> DataFrame: ... - def with_columns(self, exprs: List[Expr]) -> DataFrame: ... - def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: ... - def aggregate(self, group_by: List[Expr], aggs: List[Expr]) -> DataFrame: ... - def sort(self, *exprs: SortExpr) -> DataFrame: ... - def limit(self, count: int, offset: int) -> DataFrame: ... - def collect(self) -> List[pa.RecordBatch]: ... - def cache(self) -> DataFrame: ... - def collect_partitioned(self) -> List[List[pa.RecordBatch]]: ... - def show(self, num: int = 20): ... - def distinct(self) -> DataFrame: ... - def join( - self, right: DataFrame, how: str, left_on: List[str], right_on: List[str] - ) -> DataFrame: ... - def join_on( - self, right: DataFrame, on_exprs: List[Expr], how: str - ) -> DataFrame: ... - def explain(self, verbose: bool = False, analyze: bool = False): ... - def logical_plan(self) -> LogicalPlan: ... - def optimized_logical_plan(self) -> LogicalPlan: ... - def execution_plan(self) -> ExecutionPlan: ... - def repartition(self, num: int) -> DataFrame: ... - def repartition_by_hash(self, *args: Expr, num: int) -> DataFrame: ... - def union(self, py_df: DataFrame, distinct: bool = False) -> DataFrame: ... - def union_distinct(self, py_df: DataFrame) -> DataFrame: ... - def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame: ... - def unnest_columns( - self, columns: List[str], preserve_nulls: bool = True - ) -> DataFrame: ... - def intersect(self, py_df: DataFrame) -> DataFrame: ... - def except_all(self, py_df: DataFrame) -> DataFrame: ... - def write_csv(self, path: str, with_header: bool): ... - def write_parquet( - self, - path: str, - compression: str = "uncompressed", - compression_level: Optional[int] = None, - ): ... - def write_json(self, path: str): ... - def to_arrow_table(self) -> pa.Table: ... - def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ... - def execute_stream(self) -> RecordBatchStream: ... - def execute_stream_partitioned(self) -> List[RecordBatchStream]: ... - def to_pandas(self) -> pd.DataFrame: ... - def to_pylist(self) -> list: ... - def to_pydict(self) -> dict: ... - def to_polars(self) -> pl.DataFrame: ... - def count(self) -> int: ... - -class ScalarUDF: - def __init__( - self, - name: str, - func: Callable[..., pa.DataType], - input_types: List[pa.DataType], - return_type: pa.DataType, - volatility: str, - ) -> None: ... - def __call__(self, *args: Expr) -> Expr: ... - -class AggregateUDF: - def __init__( - self, - name: str, - accumulator: Callable[[], Accumulator], - input_types: List[pa.DataType], - return_type: pa.DataType, - state_type: List[pa.DataType], - volatility: str, - ) -> None: ... - def __call__(self, *args: Expr) -> Expr: ... - -class WindowUDF: - def __init__( - self, - name: str, - evaluator: Callable[[], WindowEvaluator], - input_types: List[pa.DataType], - return_type: pa.DataType, - volatility: str, - ) -> None: ... - def __call__(self, *args: Expr) -> Expr: ... - -class Config: - def __init__(self) -> None: ... - @staticmethod - def from_env() -> Config: ... - def get(self, key: str) -> Optional[str]: ... - def set(self, key: str, value: object): ... - def get_all(self) -> Dict[str, Optional[str]]: ... - -class LogicalPlan: - def to_variant(self) -> Any: ... - def inputs(self) -> List[LogicalPlan]: ... - def display(self) -> str: ... - def display_indent(self) -> str: ... - def display_indent_schema(self) -> str: ... - def display_graphviz(self) -> str: ... - def to_proto(self) -> bytes: ... - @staticmethod - def from_proto(ctx: SessionContext, proto_msg: bytes) -> LogicalPlan: ... - -class ExecutionPlan: - def children(self) -> List[ExecutionPlan]: ... - def display(self) -> str: ... - def display_indent(self) -> str: ... - def to_proto(self) -> bytes: ... - @staticmethod - def from_proto(ctx: SessionContext, proto_msg: bytes) -> ExecutionPlan: ... - @property - def partition_count(self) -> int: ... - -class RecordBatch: - def to_pyarrow(self) -> pa.RecordBatch: ... - -class RecordBatchStream: - def next(self) -> RecordBatch: ... - def __next__(self) -> RecordBatch: ... - async def __anext__(self) -> RecordBatch: ... - def __iter__(self) -> RecordBatch: ... - async def __aiter__(self) -> RecordBatch: ... diff --git a/python/datafusion/_internal/common.pyi b/python/datafusion/_internal/common.pyi deleted file mode 100644 index bbb0226c6..000000000 --- a/python/datafusion/_internal/common.pyi +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import enum -from typing import List, Optional, Tuple - -class DFSchema: - @staticmethod - def empty() -> DFSchema: ... - def field_names(self) -> List[str]: ... - -class DataType: ... - -class RexType(enum.IntEnum): - Alias = 0 - Literal = 1 - Call = 2 - Reference = 3 - ScalarSubquery = 4 - Other = 5 - -class PythonType(enum.IntEnum): - Array = 0 - Bool = 1 - Bytes = 2 - Datetime = 3 - Float = 4 - Int = 5 - List = 6 - None_ = 7 - Object = 8 - Str = 9 - -class SqlType(enum.IntEnum): - ANY = 0 - ARRAY = 1 - BIGINT = 2 - BINARY = 3 - BOOLEAN = 4 - CHAR = 5 - COLUMN_LIST = 6 - CURSOR = 7 - DATE = 8 - DECIMAL = 9 - DISTINCT = 10 - DOUBLE = 11 - DYNAMIC_STAR = 12 - FLOAT = 13 - GEOMETRY = 14 - INTEGER = 15 - INTERVAL = 16 - INTERVAL_DAY = 17 - INTERVAL_DAY_HOUR = 18 - INTERVAL_DAY_MINUTE = 19 - INTERVAL_DAY_SECOND = 20 - INTERVAL_HOUR = 21 - INTERVAL_HOUR_MINUTE = 22 - INTERVAL_HOUR_SECOND = 23 - INTERVAL_MINUTE = 24 - INTERVAL_MINUTE_SECOND = 25 - INTERVAL_MONTH = 26 - INTERVAL_SECOND = 27 - INTERVAL_YEAR = 28 - INTERVAL_YEAR_MONTH = 29 - MAP = 30 - MULTISET = 31 - NULL = 32 - OTHER = 33 - REAL = 34 - ROW = 35 - SARG = 36 - SMALLINT = 37 - STRUCTURED = 38 - SYMBOL = 39 - TIME = 40 - TIME_WITH_LOCAL_TIME_ZONE = 41 - TIMESTAMP = 42 - TIMESTAMP_WITH_LOCAL_TIME_ZONE = 43 - TINYINT = 44 - UNKNOWN = 45 - VARBINARY = 46 - VARCHAR = 47 - -class DataTypeMap: - def __init__( - self, arrow_type: DataType, python_type: PythonType, sql_type: SqlType - ) -> None: ... - @staticmethod - def from_parquet_type_str(parquet_str_type: str) -> DataTypeMap: ... - @staticmethod - def arrow(arrow_type: DataType) -> DataTypeMap: ... - @staticmethod - def arrow_str(arrow_type_str: str) -> DataTypeMap: ... - @staticmethod - def sql(sql_type: SqlType) -> DataTypeMap: ... - def friendly_arrow_type_name(self) -> str: ... - @property - def arrow_type(self) -> DataType: ... - @arrow_type.setter - def arrow_type(self, arrow_type: DataType): ... - @property - def python_type(self) -> PythonType: ... - @python_type.setter - def python_type(self, python_type: PythonType): ... - @property - def sql_type(self) -> SqlType: ... - @sql_type.setter - def sql_type(self, sql_type: SqlType): ... - -class NullTreatment(enum.IntEnum): - IGNORE_NULLS = 0 - RESPECT_NULLS = 1 - -class SqlSchema: - def __init__(self, schema_name: str) -> None: ... - def table_by_name(self, table_name: str) -> Optional[SqlTable]: ... - def add_table(self, table: SqlTable): ... - def drop_table(self, table_name: str): ... - @property - def name(self) -> str: ... - @name.setter - def name(self, name: str): ... - @property - def tables(self) -> List[SqlTable]: ... - @tables.setter - def tables(self, tables: List[SqlTable]): ... - @property - def views(self) -> List[SqlView]: ... - @views.setter - def views(self, views: List[SqlView]): ... - @property - def functions(self) -> List[SqlFunction]: ... - @functions.setter - def functions(self, functions: List[SqlFunction]): ... - -class SqlTable: - def __init__( - self, - table_name: str, - columns: List[Tuple[str, DataTypeMap]], - row_count: int, - filepaths: Optional[List[str]] = None, - ) -> None: ... - @property - def name(self) -> str: ... - @name.setter - def name(self, name: str): ... - @property - def columns(self) -> List[Tuple[str, DataTypeMap]]: ... - @columns.setter - def columns(self, columns: List[Tuple[str, DataTypeMap]]): ... - @property - def primary_key(self) -> Optional[str]: ... - @primary_key.setter - def primary_key(self, primary_key: Optional[str]): ... - @property - def foreign_keys(self) -> List[str]: ... - @foreign_keys.setter - def foreign_keys(self, foreign_keys: List[str]): ... - @property - def indexes(self) -> List[str]: ... - @indexes.setter - def indexes(self, indexes: List[str]): ... - @property - def constraints(self) -> List[str]: ... - @constraints.setter - def constraints(self, constraints: List[str]): ... - @property - def statistics(self) -> SqlStatistics: ... - @statistics.setter - def statistics(self, statistics: SqlStatistics): ... - @property - def filepaths(self) -> Optional[List[str]]: ... - @filepaths.setter - def filepaths(self, filepaths: Optional[List[str]]): ... - -class SqlView: - @property - def name(self) -> str: ... - @name.setter - def name(self, name: str): ... - @property - def definition(self) -> str: ... - @definition.setter - def definition(self, definition: str): ... - -class SqlStatistics: - def __init__(self, row_count: float) -> None: ... - def getRowCount(self) -> float: ... - -class SqlFunction: ... diff --git a/python/datafusion/_internal/expr/__init__.pyi b/python/datafusion/_internal/expr/__init__.pyi deleted file mode 100644 index ba7cc9101..000000000 --- a/python/datafusion/_internal/expr/__init__.pyi +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr as Expr, ExprFuncBuilder as ExprFuncBuilder -from .column import Column as Column -from .binary_expr import BinaryExpr as BinaryExpr -from .literal import Literal as Literal -from .aggregate_expr import AggregateFunction as AggregateFunction -from .bool_expr import ( - Not as Not, - IsNotNull as IsNotNull, - IsNull as IsNull, - IsTrue as IsTrue, - IsFalse as IsFalse, - IsUnknown as IsUnknown, - IsNotTrue as IsNotTrue, - IsNotFalse as IsNotFalse, - IsNotUnknown as IsNotUnknown, - Negative as Negative, -) -from .like import Like as Like, ILike as ILike, SimilarTo as SimilarTo -from .scalar_variable import ScalarVariable as ScalarVariable -from .alias import Alias as Alias -from .in_list import InList as InList -from .exists import Exists as Exists -from .subquery import Subquery as Subquery -from .in_subquery import InSubquery as InSubquery -from .scalar_subquery import ScalarSubquery as ScalarSubquery -from .placeholder import Placeholder as Placeholder -from .grouping_set import GroupingSet as GroupingSet -from .case import Case as Case -from .conditional_expr import CaseBuilder as CaseBuilder -from .cast import Cast as Cast, TryCast as TryCast -from .between import Between as Between -from .explain import Explain as Explain -from .limit import Limit as Limit -from .aggregate import Aggregate as Aggregate -from .sort import Sort as Sort -from .analyze import Analyze as Analyze -from .empty_relation import EmptyRelation as EmptyRelation -from .join import Join as Join, JoinType as JoinType, JoinConstraint as JoinConstraint -from .union import Union as Union -from .unnest import Unnest as Unnest -from .unnest_expr import UnnestExpr as UnnestExpr -from .extension import Extension as Extension -from .filter import Filter as Filter -from .projection import Projection as Projection -from .table_scan import TableScan as TableScan -from .create_memory_table import CreateMemoryTable as CreateMemoryTable -from .create_view import CreateView as CreateView -from .distinct import Distinct as Distinct -from .sort_expr import SortExpr as SortExpr -from .subquery_alias import SubqueryAlias as SubqueryAlias -from .drop_table import DropTable as DropTable -from .repartition import Partitioning as Partitioning, Repartition as Repartition -from .window import ( - WindowExpr as WindowExpr, - WindowFrame as WindowFrame, - WindowFrameBound as WindowFrameBound, -) diff --git a/python/datafusion/_internal/expr/aggregate.pyi b/python/datafusion/_internal/expr/aggregate.pyi deleted file mode 100644 index c6fe8a0b3..000000000 --- a/python/datafusion/_internal/expr/aggregate.pyi +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema -from .base import Expr - -class Aggregate: - def group_by_exprs(self) -> List[Expr]: ... - def aggregate_exprs(self) -> List[Expr]: ... - def agg_expressions(self) -> List[Expr]: ... - def agg_func_name(self, expr: Expr) -> str: ... - def aggregation_arguments(self, expr: Expr) -> List[Expr]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/aggregate_expr.pyi b/python/datafusion/_internal/expr/aggregate_expr.pyi deleted file mode 100644 index b093e1b42..000000000 --- a/python/datafusion/_internal/expr/aggregate_expr.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .base import Expr - -class AggregateFunction: - def aggregate_type(self) -> str: ... - def is_distinct(self) -> bool: ... - def args(self) -> List[Expr]: ... diff --git a/python/datafusion/_internal/expr/alias.pyi b/python/datafusion/_internal/expr/alias.pyi deleted file mode 100644 index af9d4fb3d..000000000 --- a/python/datafusion/_internal/expr/alias.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class Alias: - def alias(self) -> str: ... - def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/analyze.pyi b/python/datafusion/_internal/expr/analyze.pyi deleted file mode 100644 index 979edd2ac..000000000 --- a/python/datafusion/_internal/expr/analyze.pyi +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class Analyze: - def verbose(self) -> bool: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/base.pyi b/python/datafusion/_internal/expr/base.pyi deleted file mode 100644 index a91914a05..000000000 --- a/python/datafusion/_internal/expr/base.pyi +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, List, Optional -import pyarrow as pa - -from ..common import RexType, DataTypeMap, NullTreatment -from .. import LogicalPlan -from .window import WindowFrame -from .sort_expr import SortExpr - -class Expr: - def to_variant(self) -> Any: ... - def schema_name(self) -> str: ... - def canonical_name(self) -> str: ... - def variant_name(self) -> str: ... - def __richcmp__(self, other: Expr, op: int) -> Expr: ... - def __add__(self, rhs: Expr) -> Expr: ... - def __sub__(self, rhs: Expr) -> Expr: ... - def __truediv__(self, rhs: Expr) -> Expr: ... - def __mul__(self, rhs: Expr) -> Expr: ... - def __mod__(self, rhs: Expr) -> Expr: ... - def __and__(self, rhs: Expr) -> Expr: ... - def __or__(self, rhs: Expr) -> Expr: ... - def __invert__(self) -> Expr: ... - def __getitem__(self, key: str) -> Expr: ... - @staticmethod - def literal(value: Any) -> Expr: ... - @staticmethod - def column(value: str) -> Expr: ... - def alias(self, name: str) -> Expr: ... - def sort(self, ascending: bool = True, nulls_first: bool = True) -> Expr: ... - def is_null(self) -> Expr: ... - def is_not_null(self) -> Expr: ... - def cast(self, to: pa.DataType) -> Expr: ... - def between(self, low: Expr, high: Expr, negated: bool = False) -> Expr: ... - def rex_type(self) -> RexType: ... - def types(self) -> DataTypeMap: ... - def python_value(self) -> Any: ... - def rex_call_operands(self) -> List[Expr]: ... - def rex_call_operator(self) -> str: ... - def column_name(self, plan: LogicalPlan) -> str: ... - def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... - def filter(self, filter: Expr) -> ExprFuncBuilder: ... - def distinct(self) -> ExprFuncBuilder: ... - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... - def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... - def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... - def over( - self, - partition_by: Optional[List[Expr]] = None, - window_frame: Optional[WindowFrame] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[NullTreatment] = None, - ) -> Expr: ... - -class ExprFuncBuilder: - def order_by(self, order_by: List[SortExpr]) -> ExprFuncBuilder: ... - def filter(self, filter: Expr) -> ExprFuncBuilder: ... - def distinct(self) -> ExprFuncBuilder: ... - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: ... - def partition_by(self, partition_by: List[Expr]) -> ExprFuncBuilder: ... - def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: ... - def build(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/between.pyi b/python/datafusion/_internal/expr/between.pyi deleted file mode 100644 index a693225c4..000000000 --- a/python/datafusion/_internal/expr/between.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class Between: - def expr(self) -> Expr: ... - def negated(self) -> bool: ... - def low(self) -> Expr: ... - def high(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/binary_expr.pyi b/python/datafusion/_internal/expr/binary_expr.pyi deleted file mode 100644 index bb4f11e32..000000000 --- a/python/datafusion/_internal/expr/binary_expr.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class BinaryExpr: - def left(self) -> Expr: ... - def right(self) -> Expr: ... - def on(self) -> str: ... diff --git a/python/datafusion/_internal/expr/bool_expr.pyi b/python/datafusion/_internal/expr/bool_expr.pyi deleted file mode 100644 index 3d0f1d846..000000000 --- a/python/datafusion/_internal/expr/bool_expr.pyi +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class Not: - def expr(self) -> Expr: ... - -class IsNotNull: - def expr(self) -> Expr: ... - -class IsNull: - def expr(self) -> Expr: ... - -class IsTrue: - def expr(self) -> Expr: ... - -class IsFalse: - def expr(self) -> Expr: ... - -class IsUnknown: - def expr(self) -> Expr: ... - -class IsNotTrue: - def expr(self) -> Expr: ... - -class IsNotFalse: - def expr(self) -> Expr: ... - -class IsNotUnknown: - def expr(self) -> Expr: ... - -class Negative: - def expr(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/case.pyi b/python/datafusion/_internal/expr/case.pyi deleted file mode 100644 index 9c6cd8ef9..000000000 --- a/python/datafusion/_internal/expr/case.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional, Tuple -from .base import Expr - -class Case: - def expr(self) -> Optional[Expr]: ... - def when_then_expr(self) -> List[Tuple[Expr, Expr]]: ... - def else_expr(self) -> Optional[Expr]: ... diff --git a/python/datafusion/_internal/expr/cast.pyi b/python/datafusion/_internal/expr/cast.pyi deleted file mode 100644 index 68d450d71..000000000 --- a/python/datafusion/_internal/expr/cast.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr -from ..common import DataType - -class Cast: - def expr(self) -> Expr: ... - def data_type(self) -> DataType: ... - -class TryCast: - def expr(self) -> Expr: ... - def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/column.pyi b/python/datafusion/_internal/expr/column.pyi deleted file mode 100644 index d7d0ec167..000000000 --- a/python/datafusion/_internal/expr/column.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Optional - -class Column: - def name(self) -> str: ... - def relation(self) -> Optional[str]: ... - def qualified_name(self) -> str: ... diff --git a/python/datafusion/_internal/expr/conditional_expr.pyi b/python/datafusion/_internal/expr/conditional_expr.pyi deleted file mode 100644 index 81d09528b..000000000 --- a/python/datafusion/_internal/expr/conditional_expr.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class CaseBuilder: - def when(self, when: Expr, then: Expr) -> CaseBuilder: ... - def otherwise(self, else_expr: Expr) -> Expr: ... - def end(self) -> Expr: ... diff --git a/python/datafusion/_internal/expr/create_memory_table.pyi b/python/datafusion/_internal/expr/create_memory_table.pyi deleted file mode 100644 index 60bfa8c44..000000000 --- a/python/datafusion/_internal/expr/create_memory_table.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan - -class CreateMemoryTable: - def name(self) -> str: ... - def input(self) -> List[LogicalPlan]: ... - def if_not_exists(self) -> bool: ... - def or_replace(self) -> bool: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/create_view.pyi b/python/datafusion/_internal/expr/create_view.pyi deleted file mode 100644 index 01304d831..000000000 --- a/python/datafusion/_internal/expr/create_view.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional -from .. import LogicalPlan - -class CreateView: - def name(self) -> str: ... - def input(self) -> List[LogicalPlan]: ... - def or_replace(self) -> bool: ... - def definition(self) -> Optional[str]: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/distinct.pyi b/python/datafusion/_internal/expr/distinct.pyi deleted file mode 100644 index 3041451dc..000000000 --- a/python/datafusion/_internal/expr/distinct.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan - -class Distinct: - def input(self) -> List[LogicalPlan]: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/drop_table.pyi b/python/datafusion/_internal/expr/drop_table.pyi deleted file mode 100644 index 0df01b49c..000000000 --- a/python/datafusion/_internal/expr/drop_table.pyi +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan - -class DropTable: - def name(self) -> str: ... - def input(self) -> List[LogicalPlan]: ... - def if_exists(self) -> bool: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/empty_relation.pyi b/python/datafusion/_internal/expr/empty_relation.pyi deleted file mode 100644 index f062d167e..000000000 --- a/python/datafusion/_internal/expr/empty_relation.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from ..common import DFSchema - -class EmptyRelation: - def produce_one_row(self) -> bool: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/exists.pyi b/python/datafusion/_internal/expr/exists.pyi deleted file mode 100644 index 9395398d7..000000000 --- a/python/datafusion/_internal/expr/exists.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .subquery import Subquery - -class Exists: - def subquery(self) -> Subquery: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/explain.pyi b/python/datafusion/_internal/expr/explain.pyi deleted file mode 100644 index de1bbcb6e..000000000 --- a/python/datafusion/_internal/expr/explain.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class Explain: - def explain_string(self) -> List[str]: ... - def verbose(self) -> bool: ... - def plan(self) -> LogicalPlan: ... - def schema(self) -> DFSchema: ... - def logical_optimization_succceeded(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/extension.pyi b/python/datafusion/_internal/expr/extension.pyi deleted file mode 100644 index 552d9ed1f..000000000 --- a/python/datafusion/_internal/expr/extension.pyi +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -class Extension: - def name(self) -> str: ... diff --git a/python/datafusion/_internal/expr/filter.pyi b/python/datafusion/_internal/expr/filter.pyi deleted file mode 100644 index c9f9678e1..000000000 --- a/python/datafusion/_internal/expr/filter.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .base import Expr -from .. import LogicalPlan -from ..common import DFSchema - -class Filter: - def predicate(self) -> Expr: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/grouping_set.pyi b/python/datafusion/_internal/expr/grouping_set.pyi deleted file mode 100644 index 2f46b09c9..000000000 --- a/python/datafusion/_internal/expr/grouping_set.pyi +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -class GroupingSet: ... diff --git a/python/datafusion/_internal/expr/in_list.pyi b/python/datafusion/_internal/expr/in_list.pyi deleted file mode 100644 index 9ef53b7ca..000000000 --- a/python/datafusion/_internal/expr/in_list.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .base import Expr - -class InList: - def expr(self) -> Expr: ... - def list(self) -> List[Expr]: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/in_subquery.pyi b/python/datafusion/_internal/expr/in_subquery.pyi deleted file mode 100644 index 97d7c10be..000000000 --- a/python/datafusion/_internal/expr/in_subquery.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr -from .subquery import Subquery - -class InSubquery: - def expr(self) -> Expr: ... - def subquery(self) -> Subquery: ... - def negated(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/join.pyi b/python/datafusion/_internal/expr/join.pyi deleted file mode 100644 index 5827dc7f6..000000000 --- a/python/datafusion/_internal/expr/join.pyi +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional, Tuple -from .. import LogicalPlan -from .base import Expr -from ..common import DFSchema - -class JoinType: - def is_outer(self) -> bool: ... - -class JoinConstraint: ... - -class Join: - def left(self) -> LogicalPlan: ... - def right(self) -> LogicalPlan: ... - def on(self) -> List[Tuple[Expr, Expr]]: ... - def filter(self) -> Optional[Expr]: ... - def join_type(self) -> JoinType: ... - def join_constraint(self) -> JoinConstraint: ... - def schema(self) -> DFSchema: ... - def null_equals_null(self) -> bool: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/like.pyi b/python/datafusion/_internal/expr/like.pyi deleted file mode 100644 index d2e81eb7c..000000000 --- a/python/datafusion/_internal/expr/like.pyi +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Optional -from .base import Expr - -class Like: - def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... - -class ILike: - def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... - -class SimilarTo: - def negated(self) -> bool: ... - def expr(self) -> Expr: ... - def pattern(self) -> Expr: ... - def escape_char(self) -> Optional[str]: ... diff --git a/python/datafusion/_internal/expr/limit.pyi b/python/datafusion/_internal/expr/limit.pyi deleted file mode 100644 index 482e1e6a1..000000000 --- a/python/datafusion/_internal/expr/limit.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class Limit: - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/literal.pyi b/python/datafusion/_internal/expr/literal.pyi deleted file mode 100644 index 478990bee..000000000 --- a/python/datafusion/_internal/expr/literal.pyi +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Optional, Tuple - -class Literal: - def data_type(self) -> str: ... - def value_f32(self) -> Optional[float]: ... - def value_f64(self) -> Optional[float]: ... - def value_decimal128(self) -> Tuple[Optional[int], int, int]: ... - def value_i8(self) -> Optional[int]: ... - def value_i16(self) -> Optional[int]: ... - def value_i32(self) -> Optional[int]: ... - def value_i64(self) -> Optional[int]: ... - def value_u8(self) -> Optional[int]: ... - def value_u16(self) -> Optional[int]: ... - def value_u32(self) -> Optional[int]: ... - def value_u64(self) -> Optional[int]: ... - def value_date32(self) -> Optional[int]: ... - def value_date64(self) -> Optional[int]: ... - def value_time64(self) -> Optional[int]: ... - def value_timestamp(self) -> Tuple[Optional[int], Optional[int]]: ... - def value_bool(self) -> Optional[bool]: ... - def value_string(self) -> Optional[str]: ... - def value_interval_day_time(self) -> Optional[Tuple[int, int]]: ... - def into_type(self) -> Literal: ... diff --git a/python/datafusion/_internal/expr/placeholder.pyi b/python/datafusion/_internal/expr/placeholder.pyi deleted file mode 100644 index 368bb4692..000000000 --- a/python/datafusion/_internal/expr/placeholder.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from ..common import DataType - -class Placeholder: - def id(self) -> str: ... - def data_type(self) -> DataType: ... diff --git a/python/datafusion/_internal/expr/projection.pyi b/python/datafusion/_internal/expr/projection.pyi deleted file mode 100644 index 8964b7c7c..000000000 --- a/python/datafusion/_internal/expr/projection.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .base import Expr -from .. import LogicalPlan -from ..common import DFSchema - -class Projection: - def projections(self) -> List[Expr]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/repartition.pyi b/python/datafusion/_internal/expr/repartition.pyi deleted file mode 100644 index ccb148ee1..000000000 --- a/python/datafusion/_internal/expr/repartition.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from .base import Expr - -class Repartition: - def input(self) -> List[LogicalPlan]: ... - def partitioning_scheme(self) -> Partitioning: ... - def distribute_list(self) -> List[Expr]: ... - def distribute_columns(self) -> str: ... - def __name__(self) -> str: ... - -class Partitioning: ... diff --git a/python/datafusion/_internal/expr/scalar_subquery.pyi b/python/datafusion/_internal/expr/scalar_subquery.pyi deleted file mode 100644 index c7dc6aa49..000000000 --- a/python/datafusion/_internal/expr/scalar_subquery.pyi +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .subquery import Subquery - -class ScalarSubquery: - def subquery(self) -> Subquery: ... diff --git a/python/datafusion/_internal/expr/scalar_variable.pyi b/python/datafusion/_internal/expr/scalar_variable.pyi deleted file mode 100644 index 8cbf25873..000000000 --- a/python/datafusion/_internal/expr/scalar_variable.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from ..common import DataType - -class ScalarVariable: - def data_type(self) -> DataType: ... - def variables(self) -> List[str]: ... diff --git a/python/datafusion/_internal/expr/sort.pyi b/python/datafusion/_internal/expr/sort.pyi deleted file mode 100644 index a0fe8f532..000000000 --- a/python/datafusion/_internal/expr/sort.pyi +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional - -from .. import LogicalPlan -from ..common import DFSchema -from .sort_expr import SortExpr - -class Sort: - def sort_exprs(self) -> List[SortExpr]: ... - def get_fetch_val(self) -> Optional[int]: ... - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... diff --git a/python/datafusion/_internal/expr/sort_expr.pyi b/python/datafusion/_internal/expr/sort_expr.pyi deleted file mode 100644 index 669fbafe3..000000000 --- a/python/datafusion/_internal/expr/sort_expr.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class SortExpr: - def __init__( - self, - expr: Expr, - asc: bool, - nulls_first: bool, - ) -> None: ... - def expr(self) -> Expr: ... - def ascending(self) -> bool: ... - def nulls_first(self) -> bool: ... diff --git a/python/datafusion/_internal/expr/subquery.pyi b/python/datafusion/_internal/expr/subquery.pyi deleted file mode 100644 index 4ef8e696c..000000000 --- a/python/datafusion/_internal/expr/subquery.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan - -class Subquery: - def input(self) -> List[LogicalPlan]: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/subquery_alias.pyi b/python/datafusion/_internal/expr/subquery_alias.pyi deleted file mode 100644 index 5905f7d93..000000000 --- a/python/datafusion/_internal/expr/subquery_alias.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class SubqueryAlias: - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def alias(self) -> str: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/table_scan.pyi b/python/datafusion/_internal/expr/table_scan.pyi deleted file mode 100644 index fadfb3ab0..000000000 --- a/python/datafusion/_internal/expr/table_scan.pyi +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional, Tuple -from ..common import DFSchema -from .base import Expr - -class TableScan: - def table_name(self) -> str: ... - def fqn(self) -> Tuple[Optional[str], Optional[str], str]: ... - def projection(self) -> List[Tuple[int, str]]: ... - def schema(self) -> DFSchema: ... - def filters(self) -> List[Expr]: ... - def fetch(self) -> Optional[int]: ... diff --git a/python/datafusion/_internal/expr/union.pyi b/python/datafusion/_internal/expr/union.pyi deleted file mode 100644 index 4d4c63da3..000000000 --- a/python/datafusion/_internal/expr/union.pyi +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class Union: - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest.pyi b/python/datafusion/_internal/expr/unnest.pyi deleted file mode 100644 index a6d7d4af5..000000000 --- a/python/datafusion/_internal/expr/unnest.pyi +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List -from .. import LogicalPlan -from ..common import DFSchema - -class Unnest: - def input(self) -> List[LogicalPlan]: ... - def schema(self) -> DFSchema: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/unnest_expr.pyi b/python/datafusion/_internal/expr/unnest_expr.pyi deleted file mode 100644 index ea787a00d..000000000 --- a/python/datafusion/_internal/expr/unnest_expr.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .base import Expr - -class UnnestExpr: - def expr(self) -> Expr: ... - def __name__(self) -> str: ... diff --git a/python/datafusion/_internal/expr/window.pyi b/python/datafusion/_internal/expr/window.pyi deleted file mode 100644 index f47438d0f..000000000 --- a/python/datafusion/_internal/expr/window.pyi +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, List, Optional -from ..common import DFSchema -from .base import Expr -from .sort_expr import SortExpr - -class WindowExpr: - def schema(self) -> DFSchema: ... - def get_window_expr(self) -> List[Expr]: ... - def get_sort_exprs(self, expr: Expr) -> List[SortExpr]: ... - def get_partition_exprs(self, expr: Expr) -> List[Expr]: ... - def get_args(self, expr: Expr) -> List[Expr]: ... - def window_func_name(self, expr: Expr) -> str: ... - def get_frame(self, expr: Expr) -> Optional[WindowFrame]: ... - -class WindowFrame: - def __init__( - self, - unit: str, - start_bound: Optional[Any], - end_bound: Optional[Any], - ) -> None: ... - def get_frame_units(self) -> str: ... - def get_lower_bound(self) -> WindowFrameBound: ... - def get_upper_bound(self) -> WindowFrameBound: ... - -class WindowFrameBound: - def is_current_row(self) -> bool: ... - def is_preceding(self) -> bool: ... - def is_following(self) -> bool: ... - def get_offset(self) -> Optional[int]: ... - def is_unbounded(self) -> bool: ... diff --git a/python/datafusion/_internal/functions/__init__.pyi b/python/datafusion/_internal/functions/__init__.pyi deleted file mode 100644 index 9d10bceac..000000000 --- a/python/datafusion/_internal/functions/__init__.pyi +++ /dev/null @@ -1,305 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, List, Optional, Protocol -from ..expr import Expr, SortExpr, CaseBuilder, WindowFrame -from .. import SessionContext - -def in_list(expr: Expr, value: List[Expr], negated: bool) -> Expr: ... -def make_array(exprs: List[Expr]) -> Expr: ... -def array_concat(exprs: List[Expr]) -> Expr: ... -def array_cat(exprs: List[Expr]) -> Expr: ... -def array_position(array: Expr, element: Expr, index: Optional[int] = None) -> Expr: ... -def array_slice( - array: Expr, begin: Expr, end: Expr, stride: Optional[Expr] = None -) -> Expr: ... -def digest(value: Expr, method: Expr) -> Expr: ... -def concat(args: List[Expr]) -> Expr: ... -def concat_ws(sep: str, args: List[Expr]) -> Expr: ... -def regexp_like(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... -def regexp_match(values: Expr, regex: Expr, flags: Optional[Expr] = None) -> Expr: ... -def regexp_replace( - string: Expr, pattern: Expr, replacement: Expr, flags: Optional[Expr] = None -) -> Expr: ... -def order_by(expr: Expr, asc: bool, nulls_first: bool) -> SortExpr: ... -def alias(expr: Expr, name: str) -> Expr: ... -def col(name: str) -> Expr: ... -def case(expr: Expr) -> CaseBuilder: ... -def when(when: Expr, then: Expr) -> CaseBuilder: ... -def window( - name: str, - args: List[Expr], - partition_by: Optional[List[Expr]], - order_by: Optional[List[SortExpr]], - window_frame: Optional[WindowFrame], - ctx: Optional[SessionContext], -) -> Expr: ... -def abs(num: Expr) -> Expr: ... -def acos(num: Expr) -> Expr: ... -def acosh(num: Expr) -> Expr: ... -def ascii(num: Expr) -> Expr: ... -def asin(num: Expr) -> Expr: ... -def asinh(num: Expr) -> Expr: ... -def atan(num: Expr) -> Expr: ... -def atanh(num: Expr) -> Expr: ... -def atan2(y: Expr, x: Expr) -> Expr: ... -def bit_length(arg: Expr) -> Expr: ... -def btrim(*args: Expr) -> Expr: ... -def cbrt(num: Expr) -> Expr: ... -def ceil(num: Expr) -> Expr: ... -def character_length(string: Expr) -> Expr: ... -def length(string: Expr) -> Expr: ... -def char_length(string: Expr) -> Expr: ... -def chr(arg: Expr) -> Expr: ... -def coalesce(*args: Expr) -> Expr: ... -def cos(num: Expr) -> Expr: ... -def cosh(num: Expr) -> Expr: ... -def cot(num: Expr) -> Expr: ... -def degrees(num: Expr) -> Expr: ... -def decode(input: Expr, encoding: Expr) -> Expr: ... -def encode(input: Expr, encoding: Expr) -> Expr: ... -def ends_with(string: Expr, suffix: Expr) -> Expr: ... -def exp(num: Expr) -> Expr: ... -def factorial(num: Expr) -> Expr: ... -def floor(num: Expr) -> Expr: ... -def gcd(x: Expr, y: Expr) -> Expr: ... -def initcap(string: Expr) -> Expr: ... -def isnan(num: Expr) -> Expr: ... -def iszero(num: Expr) -> Expr: ... -def levenshtein(string1: Expr, string2: Expr) -> Expr: ... -def lcm(x: Expr, y: Expr) -> Expr: ... -def left(string: Expr, n: Expr) -> Expr: ... -def ln(num: Expr) -> Expr: ... -def log(base: Expr, num: Expr) -> Expr: ... -def log10(num: Expr) -> Expr: ... -def log2(num: Expr) -> Expr: ... -def lower(arg1: Expr) -> Expr: ... -def lpad(*args: Expr) -> Expr: ... -def ltrim(*args: Expr) -> Expr: ... -def md5(input_arg: Expr) -> Expr: ... -def nanvl(x: Expr, y: Expr) -> Expr: ... -def nvl(x: Expr, y: Expr) -> Expr: ... -def nullif(arg_1: Expr, arg_2: Expr) -> Expr: ... -def octet_length(args: Expr) -> Expr: ... -def overlay(*args: Expr) -> Expr: ... -def pi() -> Expr: ... -def power(base: Expr, exponent: Expr) -> Expr: ... -def radians(num: Expr) -> Expr: ... -def repeat(string: Expr, n: Expr) -> Expr: ... -def replace(string: Expr, from_: Expr, to: Expr) -> Expr: ... -def reverse(string: Expr) -> Expr: ... -def right(string: Expr, n: Expr) -> Expr: ... -def round(*args: Expr) -> Expr: ... -def rpad(*args: Expr) -> Expr: ... -def rtrim(*args: Expr) -> Expr: ... -def sha224(input_arg1: Expr) -> Expr: ... -def sha256(input_arg1: Expr) -> Expr: ... -def sha384(input_arg1: Expr) -> Expr: ... -def sha512(input_arg1: Expr) -> Expr: ... -def signum(num: Expr) -> Expr: ... -def sin(num: Expr) -> Expr: ... -def sinh(num: Expr) -> Expr: ... -def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr: ... -def sqrt(num: Expr) -> Expr: ... -def starts_with(string: Expr, prefix: Expr) -> Expr: ... -def strpos(string: Expr, substring: Expr) -> Expr: ... -def substr(string: Expr, position: Expr) -> Expr: ... -def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr: ... -def substring(string: Expr, position: Expr, length: Expr) -> Expr: ... -def find_in_set(string: Expr, string_list: Expr) -> Expr: ... -def tan(num: Expr) -> Expr: ... -def tanh(num: Expr) -> Expr: ... -def to_hex(arg1: Expr) -> Expr: ... -def now() -> Expr: ... -def to_timestamp(*args: Expr) -> Expr: ... -def to_timestamp_millis(*args: Expr) -> Expr: ... -def to_timestamp_micros(*args: Expr) -> Expr: ... -def to_timestamp_seconds(*args: Expr) -> Expr: ... -def to_unixtime(*args: Expr) -> Expr: ... -def current_date() -> Expr: ... -def current_time() -> Expr: ... -def date_part(part: Expr, date: Expr) -> Expr: ... -def date_trunc(part: Expr, date: Expr) -> Expr: ... -def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr: ... -def make_date(year: Expr, month: Expr, day: Expr) -> Expr: ... -def translate(string: Expr, from_: Expr, to: Expr) -> Expr: ... -def trim(*args: Expr) -> Expr: ... -def trunc(*args: Expr) -> Expr: ... -def upper(arg1: Expr) -> Expr: ... -def uuid() -> Expr: ... -def struct(*args: Expr) -> Expr: ... -def named_struct(*args: Expr) -> Expr: ... -def from_unixtime(unixtime: Expr) -> Expr: ... -def arrow_typeof(arg_1: Expr) -> Expr: ... -def arrow_cast(arg_1: Expr, datatype: Expr) -> Expr: ... -def random() -> Expr: ... -def array_append(array: Expr, element: Expr) -> Expr: ... -def array_to_string(array: Expr, delimiter: Expr) -> Expr: ... -def array_dims(array: Expr) -> Expr: ... -def array_distinct(array: Expr) -> Expr: ... -def array_element(array: Expr, element: Expr) -> Expr: ... -def array_empty(array: Expr) -> Expr: ... -def array_length(array: Expr) -> Expr: ... -def array_has(first_array: Expr, second_array: Expr) -> Expr: ... -def array_has_all(first_array: Expr, second_array: Expr) -> Expr: ... -def array_has_any(first_array: Expr, second_array: Expr) -> Expr: ... -def array_positions(array: Expr, element: Expr) -> Expr: ... -def array_ndims(array: Expr) -> Expr: ... -def array_prepend(element: Expr, array: Expr) -> Expr: ... -def array_pop_back(array: Expr) -> Expr: ... -def array_pop_front(array: Expr) -> Expr: ... -def array_remove(array: Expr, element: Expr) -> Expr: ... -def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: ... -def array_remove_all(array: Expr, element: Expr) -> Expr: ... -def array_repeat(element: Expr, count: Expr) -> Expr: ... -def array_replace(array: Expr, from_: Expr, to: Expr) -> Expr: ... -def array_replace_n(array: Expr, from_: Expr, to: Expr, mx: Expr) -> Expr: ... -def array_replace_all(array: Expr, from_: Expr, to: Expr) -> Expr: ... -def array_sort(array: Expr, desc: Expr, null_first: Expr) -> Expr: ... -def array_intersect(first_array: Expr, second_array: Expr) -> Expr: ... -def array_union(array1: Expr, array2: Expr) -> Expr: ... -def array_except(first_array: Expr, second_array: Expr) -> Expr: ... -def array_resize(array: Expr, size: Expr, value: Expr) -> Expr: ... -def cardinality(array: Expr) -> Expr: ... -def flatten(array: Expr) -> Expr: ... -def range(start: Expr, stop: Expr, step: Expr) -> Expr: ... - -class AggregateFunction(Protocol): - def __call__( - self, - exp: Expr, - *, - distinct: Optional[bool] = None, - filter: Optional[Expr] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[int] = None, - ) -> Expr: ... - -class AggregateFunctionYX(Protocol): - def __call__( - self, - y: Expr, - x: Expr, - *, - distinct: Optional[bool] = None, - filter: Optional[Expr] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[int] = None, - ) -> Expr: ... - -array_agg: AggregateFunction -max: AggregateFunction -min: AggregateFunction -avg: AggregateFunction -sum: AggregateFunction -bit_and: AggregateFunction -bit_or: AggregateFunction -bit_xor: AggregateFunction -bool_and: AggregateFunction -bool_or: AggregateFunction -corr: AggregateFunctionYX -count: AggregateFunction -covar_samp: AggregateFunctionYX -covar_pop: AggregateFunctionYX -median: AggregateFunction -regr_slope: AggregateFunctionYX -regr_intercept: AggregateFunctionYX -regr_count: AggregateFunctionYX -regr_r2: AggregateFunctionYX -regr_avgx: AggregateFunctionYX -regr_avgy: AggregateFunctionYX -regr_sxx: AggregateFunctionYX -regr_syy: AggregateFunctionYX -regr_sxy: AggregateFunctionYX -stddev: AggregateFunction -stddev_pop: AggregateFunction -var_sample: AggregateFunction -var_pop: AggregateFunction -approx_distinct: AggregateFunction -approx_median: AggregateFunction - -def approx_percentile_cont( - expression: Expr, - percentile: float, - num_centroids: Optional[int] = None, - filter: Optional[Expr] = None, -) -> Expr: ... -def approx_percentile_cont_with_weight( - expression: Expr, - weight: Expr, - percentile: float, - filter: Optional[Expr] = None, -) -> Expr: ... - -last_value: AggregateFunction - -def first_value( - expr: Expr, - distinct: Optional[bool] = None, - filter: Optional[Expr] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[int] = None, -) -> Expr: ... -def nth_value( - expr: Expr, - n: int, - distinct: Optional[bool] = None, - filter: Optional[Expr] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[int] = None, -) -> Expr: ... -def string_agg( - expr: Expr, - delimiter: str, - distinct: Optional[bool] = None, - filter: Optional[Expr] = None, - order_by: Optional[List[SortExpr]] = None, - null_treatment: Optional[int] = None, -) -> Expr: ... -def lead( - arg: Expr, - shift_offset: int, - default_value: Optional[Any] = None, - partition_by: Optional[List[Expr]] = None, - order_by: Optional[List[SortExpr]] = None, -) -> Expr: ... -def lag( - arg: Expr, - shift_offset: int, - default_value: Optional[Any] = None, - partition_by: Optional[List[Expr]] = None, - order_by: Optional[List[SortExpr]] = None, -) -> Expr: ... - -class WindowFunction(Protocol): - def __call__( - self, - partition_by: Optional[List[Expr]] = None, - order_by: Optional[List[SortExpr]] = None, - ) -> Expr: ... - -row_number: WindowFunction -rank: WindowFunction -dense_rank: WindowFunction -percent_rank: WindowFunction -cume_dist: WindowFunction - -def ntile( - arg: Expr, - partition_by: Optional[List[Expr]] = None, - order_by: Optional[List[SortExpr]] = None, -) -> Expr: ... diff --git a/python/datafusion/_internal/object_store/__init__.pyi b/python/datafusion/_internal/object_store/__init__.pyi deleted file mode 100644 index 6e82564c7..000000000 --- a/python/datafusion/_internal/object_store/__init__.pyi +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional, Tuple - -class LocalFileSystem: - def __init__(self, prefix: Optional[str] = None) -> None: ... - -class MicrosoftAzure: - def __init__( - self, - container_name: str, - account: Optional[str] = None, - access_key: Optional[str] = None, - bearer_token: Optional[str] = None, - client_id: Optional[str] = None, - client_secret: Optional[str] = None, - tenant_id: Optional[str] = None, - sas_query_pairs: Optional[List[Tuple[str, str]]] = None, - use_emulator: Optional[bool] = None, - allow_http: Optional[bool] = None, - ) -> None: ... - -class GoogleCloud: - def __init__( - self, - bucket_name: str, - service_account_path: Optional[str] = None, - ) -> None: ... - -class AmazonS3: - def __init__( - self, - bucket_name: str, - region: Optional[str] = None, - access_key_id: Optional[str] = None, - secret_access_key: Optional[str] = None, - endpoint: Optional[str] = None, - allow_http: bool = False, - imdsv1_fallback: bool = False, - ) -> None: ... - -class Http: - def __init__(self, url: str) -> None: ... diff --git a/python/datafusion/_internal/substrait/__init__.pyi b/python/datafusion/_internal/substrait/__init__.pyi deleted file mode 100644 index 63aa717b9..000000000 --- a/python/datafusion/_internal/substrait/__init__.pyi +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from .. import SessionContext, LogicalPlan - -class Plan: - def encode(self) -> bytes: ... - -class Serde: - @staticmethod - def serialize(sql: str, ctx: SessionContext, path: str): ... - @staticmethod - def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: ... - @staticmethod - def serialize_bytes(sql: str, ctx: SessionContext) -> bytes: ... - @staticmethod - def deserialize(path: str) -> Plan: ... - @staticmethod - def deserialize_bytes(proto_bytes: bytes) -> Plan: ... - -class Producer: - @staticmethod - def to_substrait_plan(plan: LogicalPlan, ctx: SessionContext) -> Plan: ... - -class Consumer: - @staticmethod - def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: ... diff --git a/python/datafusion/context.py b/python/datafusion/context.py index e84c9e3bc..c9438f0f5 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -30,8 +30,6 @@ from datafusion.record_batch import RecordBatchStream from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF -import pyarrow.dataset - from typing import Any, TYPE_CHECKING, Protocol from typing_extensions import deprecated @@ -489,10 +487,10 @@ def __init__( ctx = SessionContext() df = ctx.read_csv("data.csv") """ - config_internal = config.config_internal if config is not None else None - runtime_internal = runtime.config_internal if runtime is not None else None + config = config.config_internal if config is not None else None + runtime = runtime.config_internal if runtime is not None else None - self.ctx = SessionContextInternal(config_internal, runtime_internal) + self.ctx = SessionContextInternal(config, runtime) def enable_url_table(self) -> "SessionContext": """Control if local files can be queried as tables. @@ -818,13 +816,13 @@ def register_csv( file_compression_type: File compression type. """ if isinstance(path, list): - path_inner = [str(p) for p in path] + path = [str(p) for p in path] else: - path_inner = str(path) + path = str(path) self.ctx.register_csv( name, - path_inner, + path, schema, has_header, delimiter, @@ -1017,11 +1015,11 @@ def read_csv( if table_partition_cols is None: table_partition_cols = [] - path_inner = [str(p) for p in path] if isinstance(path, list) else str(path) + path = [str(p) for p in path] if isinstance(path, list) else str(path) return DataFrame( self.ctx.read_csv( - path_inner, + path, schema, has_header, delimiter, @@ -1064,7 +1062,7 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] - file_sort_order_raw = ( + file_sort_order = ( [sort_list_to_raw_sort_list(f) for f in file_sort_order] if file_sort_order is not None else None @@ -1077,7 +1075,7 @@ def read_parquet( file_extension, skip_metadata, schema, - file_sort_order_raw, + file_sort_order, ) ) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 0c09efc3d..7188ffa92 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -252,9 +252,9 @@ def aggregate( group_by = group_by if isinstance(group_by, list) else [group_by] aggs = aggs if isinstance(aggs, list) else [aggs] - group_by_inner = [e.expr for e in group_by] - aggs_inner = [e.expr for e in aggs] - return DataFrame(self.df.aggregate(group_by_inner, aggs_inner)) + group_by = [e.expr for e in group_by] + aggs = [e.expr for e in aggs] + return DataFrame(self.df.aggregate(group_by, aggs)) def sort(self, *exprs: Expr | SortExpr) -> DataFrame: """Sort the DataFrame by the specified sorting expressions. @@ -452,8 +452,8 @@ def join( raise ValueError( "`left_on` or `right_on` should not provided with `on`" ) - left_on = on # type: ignore - right_on = on # type: ignore + left_on = on + right_on = on elif left_on is not None or right_on is not None: if left_on is None or right_on is None: raise ValueError("`left_on` and `right_on` should both be provided.") @@ -466,7 +466,7 @@ def join( if isinstance(right_on, str): right_on = [right_on] - return DataFrame(self.df.join(right.df, how, left_on, right_on)) # type: ignore + return DataFrame(self.df.join(right.df, how, left_on, right_on)) def join_on( self, @@ -552,8 +552,8 @@ def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame: Returns: Repartitioned DataFrame. """ - exprs_inner = [expr.expr for expr in exprs] - return DataFrame(self.df.repartition_by_hash(*exprs_inner, num=num)) + exprs = [expr.expr for expr in exprs] + return DataFrame(self.df.repartition_by_hash(*exprs, num=num)) def union(self, other: DataFrame, distinct: bool = False) -> DataFrame: """Calculate the union of two :py:class:`DataFrame`. @@ -725,10 +725,8 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram Returns: A DataFrame with the columns expanded. """ - columns_inner = [c for c in columns] - return DataFrame( - self.df.unnest_columns(columns_inner, preserve_nulls=preserve_nulls) - ) + columns = [c for c in columns] + return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls)) def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any: """Export an Arrow PyCapsule Stream. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index e2ec62d93..732c8eece 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -175,10 +175,10 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: def sort_list_to_raw_sort_list( - sort_list: list[Expr | SortExpr], -) -> list[expr_internal.SortExpr]: + sort_list: Optional[list[Expr | SortExpr]], +) -> Optional[list[expr_internal.SortExpr]]: """Helper function to return an optional sort list to raw variant.""" - return [sort_or_default(e) for e in sort_list] + return [sort_or_default(e) for e in sort_list] if sort_list is not None else None class Expr: @@ -306,23 +306,23 @@ def __getitem__(self, key: str | int) -> Expr: ) return Expr(self.expr.__getitem__(key)) - def __eq__(self, rhs: Any) -> Expr: # type: ignore + def __eq__(self, rhs: Any) -> Expr: """Equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__eq__(rhs.expr)) # type: ignore + return Expr(self.expr.__eq__(rhs.expr)) - def __ne__(self, rhs: Any) -> Expr: # type: ignore + def __ne__(self, rhs: Any) -> Expr: """Not equal to. Accepts either an expression or any valid PyArrow scalar literal value. """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ne__(rhs.expr)) # type: ignore + return Expr(self.expr.__ne__(rhs.expr)) def __ge__(self, rhs: Any) -> Expr: """Greater than or equal to. @@ -331,7 +331,7 @@ def __ge__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__ge__(rhs.expr)) # type: ignore + return Expr(self.expr.__ge__(rhs.expr)) def __gt__(self, rhs: Any) -> Expr: """Greater than. @@ -340,7 +340,7 @@ def __gt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__gt__(rhs.expr)) # type: ignore + return Expr(self.expr.__gt__(rhs.expr)) def __le__(self, rhs: Any) -> Expr: """Less than or equal to. @@ -349,7 +349,7 @@ def __le__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__le__(rhs.expr)) # type: ignore + return Expr(self.expr.__le__(rhs.expr)) def __lt__(self, rhs: Any) -> Expr: """Less than. @@ -358,7 +358,7 @@ def __lt__(self, rhs: Any) -> Expr: """ if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) - return Expr(self.expr.__lt__(rhs.expr)) # type: ignore + return Expr(self.expr.__lt__(rhs.expr)) __radd__ = __add__ __rand__ = __and__ @@ -584,9 +584,7 @@ def over(self, window: Window) -> Expr: window: Window definition """ partition_by_raw = expr_list_to_raw_expr_list(window._partition_by) - order_by_raw = ( - sort_list_to_raw_sort_list(window._order_by) if window._order_by else None - ) + order_by_raw = sort_list_to_raw_sort_list(window._order_by) window_frame_raw = ( window._window_frame.window_frame if window._window_frame is not None diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index f8a7418af..8d6176db3 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -332,8 +332,8 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr: def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr: """Returns whether the argument is contained within the list ``values``.""" - values_inner = [v.expr for v in values] - return Expr(f.in_list(arg.expr, values_inner, negated)) + values = [v.expr for v in values] + return Expr(f.in_list(arg.expr, values, negated)) def digest(value: Expr, method: Expr) -> Expr: @@ -350,8 +350,8 @@ def concat(*args: Expr) -> Expr: NULL arguments are ignored. """ - args_inner = [arg.expr for arg in args] - return Expr(f.concat(args_inner)) + args = [arg.expr for arg in args] + return Expr(f.concat(args)) def concat_ws(separator: str, *args: Expr) -> Expr: @@ -359,8 +359,8 @@ def concat_ws(separator: str, *args: Expr) -> Expr: ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. """ - args_inner = [arg.expr for arg in args] - return Expr(f.concat_ws(separator, args_inner)) + args = [arg.expr for arg in args] + return Expr(f.concat_ws(separator, args)) def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: @@ -428,23 +428,12 @@ def window( df.select(functions.lag(col("a")).partition_by(col("b")).build()) """ - args_inner = [a.expr for a in args] - partition_by_inner = expr_list_to_raw_expr_list(partition_by) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) - window_frame_inner = window_frame.window_frame if window_frame is not None else None - ctx_inner = ctx.ctx if ctx is not None else None - return Expr( - f.window( - name, - args_inner, - partition_by_inner, - order_by_raw, - window_frame_inner, - ctx_inner, - ) - ) + args = [a.expr for a in args] + partition_by = expr_list_to_raw_expr_list(partition_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) + window_frame = window_frame.window_frame if window_frame is not None else None + ctx = ctx.ctx if ctx is not None else None + return Expr(f.window(name, args, partition_by, order_by_raw, window_frame, ctx)) # scalar functions @@ -547,8 +536,8 @@ def chr(arg: Expr) -> Expr: def coalesce(*args: Expr) -> Expr: """Returns the value of the first expr in ``args`` which is not NULL.""" - args_inner = [arg.expr for arg in args] - return Expr(f.coalesce(*args_inner)) + args = [arg.expr for arg in args] + return Expr(f.coalesce(*args)) def cos(arg: Expr) -> Expr: @@ -756,10 +745,8 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: false otherwise. """ if flags is not None: - flags_inner = flags.expr - else: - flags_inner = None - return Expr(f.regexp_like(string.expr, regex.expr, flags_inner)) + flags = flags.expr + return Expr(f.regexp_like(string.expr, regex.expr, flags)) def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: @@ -769,10 +756,8 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: corresponding index in ``regex`` to string in ``string``. """ if flags is not None: - flags_inner = flags.expr - else: - flags_inner = None - return Expr(f.regexp_match(string.expr, regex.expr, flags_inner)) + flags = flags.expr + return Expr(f.regexp_match(string.expr, regex.expr, flags)) def regexp_replace( @@ -787,12 +772,8 @@ def regexp_replace( """ if flags is not None: - flags_inner = flags.expr - else: - flags_inner = None - return Expr( - f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags_inner) - ) + flags = flags.expr + return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) def repeat(string: Expr, n: Expr) -> Expr: @@ -951,8 +932,8 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: if formatters is None: return f.to_timestamp(arg.expr) - formatters_inner = [f.expr for f in formatters] - return Expr(f.to_timestamp(arg.expr, *formatters_inner)) + formatters = [f.expr for f in formatters] + return Expr(f.to_timestamp(arg.expr, *formatters)) def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: @@ -960,8 +941,8 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters_inner = [f.expr for f in formatters] - return Expr(f.to_timestamp_millis(arg.expr, *formatters_inner)) + formatters = [f.expr for f in formatters] + return Expr(f.to_timestamp_millis(arg.expr, *formatters)) def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: @@ -969,8 +950,17 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters_inner = [f.expr for f in formatters] - return Expr(f.to_timestamp_micros(arg.expr, *formatters_inner)) + formatters = [f.expr for f in formatters] + return Expr(f.to_timestamp_micros(arg.expr, *formatters)) + + +def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: + """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. + + See :py:func:`to_timestamp` for a description on how to use formatters. + """ + formatters = [f.expr for f in formatters] + return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: @@ -978,8 +968,8 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters_inner = [f.expr for f in formatters] - return Expr(f.to_timestamp_seconds(arg.expr, *formatters_inner)) + formatters = [f.expr for f in formatters] + return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr: @@ -1066,8 +1056,8 @@ def upper(arg: Expr) -> Expr: def make_array(*args: Expr) -> Expr: """Returns an array using the specified input expressions.""" - args_inner = [arg.expr for arg in args] - return Expr(f.make_array(args_inner)) + args = [arg.expr for arg in args] + return Expr(f.make_array(args)) def make_list(*args: Expr) -> Expr: @@ -1098,8 +1088,8 @@ def uuid() -> Expr: def struct(*args: Expr) -> Expr: """Returns a struct with the given arguments.""" - args_inner = [arg.expr for arg in args] - return Expr(f.struct(*args_inner)) + args = [arg.expr for arg in args] + return Expr(f.struct(*args)) def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: @@ -1110,8 +1100,8 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: ] # flatten - name_pairs_inner = [x.expr for xs in name_pair_exprs for x in xs] - return Expr(f.named_struct(*name_pairs_inner)) + name_pairs = [x.expr for xs in name_pair_exprs for x in xs] + return Expr(f.named_struct(*name_pairs)) def from_unixtime(arg: Expr) -> Expr: @@ -1165,8 +1155,8 @@ def list_push_back(array: Expr, element: Expr) -> Expr: def array_concat(*args: Expr) -> Expr: """Concatenates the input arrays.""" - args_inner = [arg.expr for arg in args] - return Expr(f.array_concat(args_inner)) + args = [arg.expr for arg in args] + return Expr(f.array_concat(args)) def array_cat(*args: Expr) -> Expr: @@ -1510,10 +1500,8 @@ def array_slice( ) -> Expr: """Returns a slice of the array.""" if stride is not None: - stride_inner = stride.expr - else: - stride_inner = None - return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride_inner)) + stride = stride.expr + return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride)) def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) -> Expr: @@ -1723,9 +1711,7 @@ def array_agg( filter: If provided, only compute against rows for which the filter is True order_by: Order the resultant array values """ - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2223,9 +2209,7 @@ def first_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2257,9 +2241,7 @@ def last_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2293,9 +2275,7 @@ def nth_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignore null values. """ - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2436,9 +2416,7 @@ def lead( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.lead( @@ -2490,9 +2468,7 @@ def lag( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.lag( @@ -2531,9 +2507,7 @@ def row_number( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.row_number( @@ -2574,9 +2548,7 @@ def rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.rank( @@ -2612,9 +2584,7 @@ def dense_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.dense_rank( @@ -2651,9 +2621,7 @@ def percent_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.percent_rank( @@ -2690,9 +2658,7 @@ def cume_dist( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.cume_dist( @@ -2733,9 +2699,7 @@ def ntile( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.ntile( @@ -2767,9 +2731,7 @@ def string_agg( filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate """ - order_by_raw = ( - sort_list_to_raw_sort_list(order_by) if order_by is not None else None - ) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 566676e4e..c4604a01c 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -270,12 +270,12 @@ def sum_bias_10() -> Summarize: """ # noqa W505 if not callable(accum): raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): # type: ignore + if not isinstance(accum.__call__(), Accumulator): raise TypeError( "Accumulator must implement the abstract base class Accumulator" ) if name is None: - name = accum.__call__().__class__.__qualname__.lower() # type: ignore + name = accum.__call__().__class__.__qualname__.lower() assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] @@ -533,13 +533,12 @@ def bias_10() -> BiasedNumbers: """ # noqa W505 if not callable(func): raise TypeError("`func` must be callable.") - if not isinstance(func.__call__(), WindowEvaluator): # type: ignore + if not isinstance(func.__call__(), WindowEvaluator): raise TypeError( "`func` must implement the abstract base class WindowEvaluator" ) if name is None: - name = func.__call__().__class__.__qualname__.lower() # type: ignore - assert name is not None + name = func.__call__().__class__.__qualname__.lower() if isinstance(input_types, pyarrow.DataType): input_types = [input_types] return WindowUDF( diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 01c6c9cef..7beef516b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -873,6 +873,18 @@ def test_temporal_functions(df): f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), f.extract(literal("day"), column("d")), + f.to_timestamp( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_seconds( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_millis( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_micros( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), ) result = df.collect() assert len(result) == 1 @@ -911,7 +923,18 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) - + assert result.column(11) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) + assert result.column(12) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + ) + assert result.column(13) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + ) + assert result.column(14) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + ) def test_arrow_cast(df): df = df.select( From 2c8ea032bb960294acad424be2a6d7d373e8741c Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 13 Feb 2025 20:27:28 +0800 Subject: [PATCH 7/9] format --- python/tests/test_functions.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 7beef516b..977745fb8 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -790,9 +790,9 @@ def test_hash_functions(df): ) assert result.column(2) == pa.array( [ - b("185F8DB32271FE25F561A6FC938B2E26" "4306EC304EDA518007D1764826381969"), - b("78AE647DC5544D227130A0682A51E30B" "C7777FBB6D8A8F17007463A3ECD1D524"), - b("BB7208BC9B5D7C04F1236A82A0093A5E" "33F40423D5BA8D4266F7092C3BA43B62"), + b("185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"), + b("78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524"), + b("BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"), ] ) assert result.column(3) == pa.array( @@ -838,16 +838,16 @@ def test_hash_functions(df): ) assert result.column(5) == pa.array( [ - b("F73A5FBF881F89B814871F46E26AD3FA" "37CB2921C5E8561618639015B3CCBB71"), - b("B792A0383FB9E7A189EC150686579532" "854E44B71AC394831DAED169BA85CCC5"), - b("27988A0E51812297C77A433F63523334" "6AEE29A829DCF4F46E0F58F402C6CFCB"), + b("F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71"), + b("B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5"), + b("27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB"), ] ) assert result.column(6) == pa.array( [ - b("FBC2B0516EE8744D293B980779178A35" "08850FDCFE965985782C39601B65794F"), - b("BF73D18575A736E4037D45F9E316085B" "86C19BE6363DE6AA789E13DEAACC1C4E"), - b("C8D11B9F7237E4034ADBCD2005735F9B" "C4C597C75AD89F4492BEC8F77D15F7EB"), + b("FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F"), + b("BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E"), + b("C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB"), ] ) assert result.column(7) == result.column(1) # SHA-224 @@ -936,6 +936,7 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) + def test_arrow_cast(df): df = df.select( # we use `string_literal` to return utf8 instead of `literal` which returns From 0d6cd0744714f33cec9a699acf35d0e0f9c6639b Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 16 Feb 2025 11:29:41 +0800 Subject: [PATCH 8/9] format --- python/tests/test_functions.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index b682a733f..fca05bb8f 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -884,6 +884,9 @@ def test_temporal_functions(df): literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") ), f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")), + f.to_timestamp_nanos( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), ) result = df.collect() assert len(result) == 1 @@ -934,6 +937,12 @@ def test_temporal_functions(df): assert result.column(14) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) + assert result.column(15) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) + assert result.column(16) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) def test_arrow_cast(df): From d771d3bf65889e49a47d59d5f1bb09f12a4286cf Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sat, 22 Feb 2025 08:52:19 +0800 Subject: [PATCH 9/9] update --- python/datafusion/context.py | 2 +- python/datafusion/udf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 7110ecce9..0d65b61d2 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -783,7 +783,7 @@ def register_parquet( file_extension, skip_metadata, schema, - [[expr.raw_sort for expr in exprs] for exprs in file_sort_order] + [sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order] if file_sort_order is not None else None, ) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 911721cc0..0bba3d723 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -158,7 +158,7 @@ def state(self) -> List[pyarrow.Scalar]: pass @abstractmethod - def update(self, values: pyarrow.Array) -> None: + def update(self, *values: pyarrow.Array) -> None: """Evaluate an array of values and update state.""" pass