From a8813bc97780bde8aa4765bc31f4181e34f218d9 Mon Sep 17 00:00:00 2001 From: Yurii Mazurevich Date: Thu, 6 Nov 2025 22:00:31 +0000 Subject: [PATCH] Update string.py to use len_chars() instead of len_bytes() It's really awkward to use bytes for unicode characters --- dataframely/columns/string.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataframely/columns/string.py b/dataframely/columns/string.py index fadb4f2..f558485 100644 --- a/dataframely/columns/string.py +++ b/dataframely/columns/string.py @@ -78,9 +78,9 @@ def dtype(self) -> pl.DataType: def validation_rules(self, expr: pl.Expr) -> dict[str, pl.Expr]: result = super().validation_rules(expr) if self.min_length is not None: - result["min_length"] = expr.str.len_bytes() >= self.min_length + result["min_length"] = expr.str.len_chars() >= self.min_length if self.max_length is not None: - result["max_length"] = expr.str.len_bytes() <= self.max_length + result["max_length"] = expr.str.len_chars() <= self.max_length if self.regex is not None: result["regex"] = expr.str.contains(self.regex) return result