Skip to content

Commit fbad38a

Browse files
docs: Add example for SchemaClass.filter() to docstring (#188)
Co-authored-by: Oliver Borchert <oliver.borchert@quantco.com>
1 parent 094ad24 commit fbad38a

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

dataframely/collection/collection.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,25 @@ def filter(
531531
Raises:
532532
ValueError: If an insufficient set of input data frames is provided, i.e. if
533533
any required member of this collection is missing in the input.
534+
535+
Example:
536+
537+
.. code-block:: python
538+
539+
# Define collection
540+
class HospitalInvoiceData(dy.Collection):
541+
invoice: dy.LazyFrame[InvoiceSchema]
542+
...
543+
544+
# Filter the data and cast columns to expected types
545+
good, failure = HospitalInvoiceData.filter(df, cast=True)
546+
547+
# Inspect the reasons for the failed rows for member `invoice`
548+
print(failure.invoice.counts())
549+
550+
# Inspect the failed rows
551+
failed_df = failure.invoice.invalid()
552+
print(failed_df)
534553
"""
535554
cls._validate_input_keys(data)
536555

dataframely/schema.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,8 @@ def filter(
637637
cast: bool = False,
638638
eager: bool = True,
639639
) -> FilterResult[Self] | LazyFilterResult[Self]:
640-
"""Filter the data frame by the rules of this schema.
640+
"""Filter the data frame by the rules of this schema, returning `(valid,
641+
failures)`.
641642
642643
This method can be thought of as a "soft alternative" to :meth:`validate`.
643644
While :meth:`validate` raises an exception when a row does not adhere to the
@@ -670,6 +671,20 @@ def filter(
670671
671672
Note:
672673
This method preserves the ordering of the input data frame.
674+
675+
Example:
676+
677+
.. code-block:: python
678+
679+
# Filter the data and cast columns to expected types
680+
good, failure = HouseSchema.filter(df, cast=True)
681+
682+
# Inspect the reasons for the failed rows
683+
print(failure.counts())
684+
685+
# Inspect the failed rows
686+
failed_df = failure.invalid()
687+
print(failed_df)
673688
"""
674689
lf = df.lazy().pipe(
675690
match_to_schema, cls, casting=("lenient" if cast else "none")

0 commit comments

Comments
 (0)