Merge pull request #160 from dreadnode/fix/optional-text-requirements

briangreunke · web-flow · commit 8d42d4aae155 · 2025-09-06T11:40:17.000-05:00
fix: text extras import errors
diff --git a/dreadnode/scorers/classification.py b/dreadnode/scorers/classification.py
@@ -1,11 +1,9 @@
 import typing as t
 
-from transformers import pipeline
-
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
-from dreadnode.util import clean_str, warn_at_user_stacklevel
+from dreadnode.util import clean_str, generate_import_error_msg, warn_at_user_stacklevel
 
 # Global cache for pipelines
 g_transformer_pipeline_cache: dict[str, t.Any] = {}
@@ -32,12 +30,10 @@ def zero_shot_classification(
         model_name: The name of the zero-shot model from Hugging Face Hub.
         name: Name of the scorer.
     """
-    transformers_error_msg = (
-        "Transformers dependency is not installed. Install with: pip install transformers"
-    )
+    transformers_error_msg = generate_import_error_msg("transformers", "training")
 
     try:
-        pipeline("zero-shot-classification", model=model_name)
+        from transformers import pipeline  # type: ignore[import-not-found]
     except ImportError:
         warn_at_user_stacklevel(transformers_error_msg, UserWarning)
 
diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py
@@ -1,15 +1,14 @@
 import re
 import typing as t
 
-from presidio_analyzer import AnalyzerEngine
-from presidio_analyzer.nlp_engine import NlpEngineProvider
-
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
 from dreadnode.scorers.contains import contains
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
 
 if t.TYPE_CHECKING:
+    from presidio_analyzer import AnalyzerEngine  # type: ignore[import-not-found]
+
     from dreadnode.types import JsonDict
 
 
@@ -66,6 +65,9 @@ def _get_presidio_analyzer() -> "AnalyzerEngine":
     """Lazily initializes and returns a singleton Presidio AnalyzerEngine instance."""
     global g_analyzer_engine  # noqa: PLW0603
 
+    from presidio_analyzer import AnalyzerEngine  # type: ignore[import-not-found]
+    from presidio_analyzer.nlp_engine import NlpEngineProvider  # type: ignore[import-not-found]
+
     if g_analyzer_engine is None:
         provider = NlpEngineProvider(
             nlp_configuration={
@@ -101,14 +103,11 @@ def detect_pii_with_presidio(
         invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
         name: Name of the scorer.
     """
-    presidio_import_error_msg = (
-        "Presidio dependencies are not installed. "
-        "Install with: pip install presidio-analyzer presidio-anonymizer 'spacy[en_core_web_lg]'"
-    )
+    presidio_import_error_msg = generate_import_error_msg("presidio-analyzer", "text")
 
     try:
-        _get_presidio_analyzer()
-    except (ImportError, OSError):
+        import presidio_analyzer  # type: ignore[import-not-found]
+    except ImportError:
         warn_at_user_stacklevel(presidio_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/readability.py b/dreadnode/scorers/readability.py
@@ -1,7 +1,5 @@
 import typing as t
 
-import textstat  # type: ignore[import-untyped]
-
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
 from dreadnode.util import warn_at_user_stacklevel
@@ -29,8 +27,8 @@ def readability(
     )
 
     try:
-        textstat.flesch_kincaid_grade("test")
-    except (ImportError, AttributeError):
+        import textstat  # type: ignore[import-not-found]
+    except ImportError:
         warn_at_user_stacklevel(textstat_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py
@@ -2,12 +2,11 @@
 import typing as t
 
 import httpx
-from textblob import TextBlob  # type: ignore[import-untyped]
 
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
 
 Sentiment = t.Literal["positive", "negative", "neutral"]
 
@@ -30,11 +29,11 @@ def sentiment(
         target: The desired sentiment to score against.
         name: Name of the scorer.
     """
-    textblob_import_error_msg = "TextBlob dependency is not installed. Install with: pip install textblob && python -m textblob.download_corpora"
+    textblob_import_error_msg = generate_import_error_msg("textblob", "text")
 
     try:
-        TextBlob("test").sentiment  # noqa: B018
-    except (ImportError, AttributeError):
+        from textblob import TextBlob  # type: ignore[import-not-found]
+    except ImportError:
         warn_at_user_stacklevel(textblob_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py
@@ -1,22 +1,16 @@
 import typing as t
 from difflib import SequenceMatcher
 
-import litellm
-import nltk  # type: ignore[import-untyped]
-from nltk.tokenize import word_tokenize  # type: ignore[import-untyped]
-from nltk.translate.bleu_score import sentence_bleu  # type: ignore[import-untyped]
-from rapidfuzz import distance, fuzz, utils
-from sentence_transformers import SentenceTransformer, util
-from sklearn.feature_extraction.text import TfidfVectorizer  # type: ignore[import-untyped]
-from sklearn.metrics.pairwise import (  # type: ignore  # noqa: PGH003
-    cosine_similarity as sklearn_cosine_similarity,
-)
-
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
 from dreadnode.scorers.util import cosine_similarity
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
+
+if t.TYPE_CHECKING:
+    from sentence_transformers import (  # type: ignore[import-not-found]
+        SentenceTransformer,
+    )
 
 
 def similarity(
@@ -94,12 +88,9 @@ def similarity_with_rapidfuzz(
         score_cutoff: Optional score cutoff below which to return 0.0.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = (
-        "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz"
-    )
-
+    rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text")
     try:
-        fuzz.ratio("test", "test")
+        from rapidfuzz import fuzz, utils  # type: ignore[import-not-found]
     except ImportError:
         warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning)
 
@@ -191,11 +182,11 @@ def string_distance(
         normalize: Normalize distances and convert to similarity scores.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = (
-        "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz"
-    )
+    rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text")
 
     try:
+        from rapidfuzz import distance  # type: ignore[import-not-found]
+
         distance.Levenshtein.distance("test", "test")
     except ImportError:
         warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning)
@@ -260,12 +251,15 @@ def similarity_with_tf_idf(reference: str, *, name: str = "similarity") -> "Scor
         reference: The reference text (e.g., expected output).
         name: Name of the scorer.
     """
-    sklearn_import_error_msg = (
-        "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn"
-    )
+    sklearn_import_error_msg = generate_import_error_msg("scikit-learn", "text")
 
     try:
-        TfidfVectorizer()
+        from sklearn.feature_extraction.text import (  # type: ignore[import-not-found]
+            TfidfVectorizer,
+        )
+        from sklearn.metrics.pairwise import (  # type: ignore[import-not-found]
+            cosine_similarity as sklearn_cosine_similarity,
+        )
     except ImportError:
         warn_at_user_stacklevel(sklearn_import_error_msg, UserWarning)
 
@@ -309,10 +303,13 @@ def similarity_with_sentence_transformers(
         model_name: The name of the sentence-transformer model to use.
         name: Name of the scorer.
     """
-    sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers"
+    sentence_transformers_error_msg = generate_import_error_msg("sentence-transformers", "training")
 
     try:
-        SentenceTransformer(model_name)
+        from sentence_transformers import (  # type: ignore[import-not-found]
+            SentenceTransformer,
+            util,
+        )
     except ImportError:
         warn_at_user_stacklevel(sentence_transformers_error_msg, UserWarning)
 
@@ -370,6 +367,16 @@ def similarity_with_litellm(
                   or self-hosted models.
         name: Name of the scorer.
     """
+    litellm_import_error_msg = generate_import_error_msg("litellm", "text")
+    try:
+        import litellm
+    except ImportError:
+        warn_at_user_stacklevel(litellm_import_error_msg, UserWarning)
+
+        def disabled_evaluate(_: t.Any) -> Metric:
+            return Metric(value=0.0, attributes={"error": litellm_import_error_msg})
+
+        return Scorer(disabled_evaluate, name=name)
 
     async def evaluate(
         data: t.Any,
@@ -423,14 +430,19 @@ def bleu(
         weights: Weights for unigram, bigram, etc. Must sum to 1.
         name: Name of the scorer.
     """
-    nltk_import_error_msg = "NLTK dependency is not installed. Install with: pip install nltk && python -m nltk.downloader punkt"
+    nltk_import_error_msg = generate_import_error_msg("nltk", "text")
 
     try:
-        # Check for the 'punkt' tokenizer data
+        import nltk  # type: ignore[import-not-found]
+        from nltk.tokenize import (  # type: ignore[import-not-found]
+            word_tokenize,
+        )
+        from nltk.translate.bleu_score import (  # type: ignore[import-not-found]
+            sentence_bleu,
+        )
+
         try:
             nltk.data.find("tokenizers/punkt")
-            word_tokenize("test")
-            sentence_bleu([["test"]], ["test"])
         except LookupError as e:
             nltk_import_error_msg = (
                 "NLTK 'punkt' tokenizer not found. Please run: python -m nltk.downloader punkt"
diff --git a/dreadnode/transforms/ascii_art.py b/dreadnode/transforms/ascii_art.py
@@ -1,5 +1,3 @@
-from art import text2art  # type: ignore[import-untyped]
-
 from dreadnode.meta import Config
 from dreadnode.transforms.base import Transform
 
@@ -8,8 +6,8 @@ def ascii_art(font: str = "rand", *, name: str = "ascii_art") -> Transform[str,
     """Converts text into ASCII art using the 'art' library."""
 
     try:
-        text2art("test")  # Test if art is working
-    except (ImportError, AttributeError):
+        from art import text2art  # type: ignore[import-not-found]
+    except ImportError:
         raise ImportError(
             "ASCII art dependency is not installed. Install with: pip install art"
         ) from ImportError("art library not available")
diff --git a/dreadnode/transforms/perturbation.py b/dreadnode/transforms/perturbation.py
@@ -3,8 +3,6 @@
 import typing as t
 import unicodedata
 
-from confusables import confusable_characters  # type: ignore[import-untyped]
-
 from dreadnode.meta import Config
 from dreadnode.transforms.base import Transform
 
@@ -226,8 +224,10 @@ def unicode_confusable(
     """
 
     try:
-        confusable_characters("a")
-    except (ImportError, AttributeError):
+        from confusables import (  # type: ignore[import-not-found]
+            confusable_characters,
+        )
+    except ImportError:
         raise ImportError(
             "Confusables dependency is not installed. Install with: pip install confusables"
         ) from ImportError("confusables library not available")
diff --git a/dreadnode/util.py b/dreadnode/util.py
@@ -154,6 +154,13 @@ def format_dict(data: dict[str, t.Any], max_length: int = 80) -> str:
     return f"{{{formatted}}}"
 
 
+def generate_import_error_msg(package_name: str, extras_name: str) -> str:
+    return (
+        f"Missing required package '{package_name}'. "
+        f"Please install it with: pip install {package_name} or dreadnode[{extras_name}]"
+    )
+
+
 # Types
 
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml