Merge pull request #682 from datafold/LAB-135

dlawin · web-flow · commit 40e62dc430ca · 2023-08-22T15:02:16.000-06:00
partial --select support for dbt &lt; 1.5
diff --git a/data_diff/__main__.py b/data_diff/__main__.py
@@ -253,8 +253,8 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
     "--select",
     "-s",
     default=None,
-    metavar="PATH",
-    help="select dbt resources to compare using dbt selection syntax.",
+    metavar="SELECTION or MODEL_NAME",
+    help="--select dbt resources to compare using dbt selection syntax in dbt versions >= 1.5.\nIn versions < 1.5, it will naively search for a model with MODEL_NAME as the name.",
 )
 @click.option(
     "--state",
diff --git a/data_diff/dbt_parser.py b/data_diff/dbt_parser.py
@@ -20,8 +20,8 @@
     DataDiffDbtRunResultsVersionError,
     DataDiffDbtSelectNoMatchingModelsError,
     DataDiffDbtSelectUnexpectedError,
-    DataDiffDbtSelectVersionTooLowError,
     DataDiffDbtSnowflakeSetConnectionError,
+    DataDiffSimpleSelectNotFound,
 )
 
 from .utils import getLogger, get_from_dict_with_raise
@@ -167,9 +167,11 @@ def get_models(self, dbt_selection: Optional[str] = None):
                         "data-diff is using a dbt-core version < 1.5, update the environment's dbt-core version via pip install 'dbt-core>=1.5' in order to use `--select`"
                     )
             else:
-                raise DataDiffDbtSelectVersionTooLowError(
-                    f"The `--select` feature requires dbt >= 1.5, but your project's manifest.json is from dbt v{dbt_version}. Please follow these steps to use the `--select` feature: \n 1. Update your dbt-core version via pip install 'dbt-core>=1.5'. Details: https://docs.getdbt.com/docs/core/pip-install#change-dbt-core-versions \n 2. Execute any `dbt` command (`run`, `compile`, `build`) to create a new manifest.json."
+                # Naively get node named <dbt_selection>
+                logger.warning(
+                    f"Full `--select` support requires dbt >= 1.5. Naively searching for a single model with name: '{dbt_selection}'."
                 )
+                return self.get_simple_model_selection(dbt_selection)
         else:
             return self.get_run_results_models()
 
@@ -209,6 +211,25 @@ def get_dbt_selection_models(self, dbt_selection: str) -> List[str]:
         logger.debug(str(results))
         raise DataDiffDbtSelectUnexpectedError("Encountered an unexpected error while finding `--select` models")
 
+    def get_simple_model_selection(self, dbt_selection: str):
+        model_nodes = dict(filter(lambda item: item[0].startswith("model."), self.dev_manifest_obj.nodes.items()))
+
+        model_unique_key_list = [k for k, v in model_nodes.items() if v.name == dbt_selection]
+
+        # name *should* always be unique, but just in case:
+        if len(model_unique_key_list) > 1:
+            logger.warning(
+                f"Found more than one model with name '{dbt_selection}' {model_unique_key_list}, using the first one."
+            )
+        elif len(model_unique_key_list) < 1:
+            raise DataDiffSimpleSelectNotFound(
+                f"Did not find a model node with name '{dbt_selection}' in the manifest."
+            )
+
+        model = model_nodes.get(model_unique_key_list[0])
+
+        return [model]
+
     def get_run_results_models(self):
         with open(self.project_dir / RUN_RESULTS_PATH) as run_results:
             logger.info(f"Parsing file {RUN_RESULTS_PATH}")
diff --git a/data_diff/errors.py b/data_diff/errors.py
@@ -42,10 +42,6 @@ class DataDiffDbtCoreNoRunnerError(Exception):
     "Raised when the manifest version >= 1.5, but the dbt-core package is < 1.5. This is an edge case most likely to occur in development."
 
 
-class DataDiffDbtSelectVersionTooLowError(Exception):
-    "Raised when attempting to use `--select` with a dbt-core version < 1.5."
-
-
 class DataDiffCustomSchemaNoConfigError(Exception):
     "Raised when a model has a custom schema, but there is no prod_custom_schema config. (And not using --state)."
 
@@ -68,3 +64,7 @@ class DataDiffCloudDiffFailed(Exception):
 
 class DataDiffCloudDiffTimedOut(Exception):
     "Raised when using --cloud and the diff did not return finish before the timeout value."
+
+
+class DataDiffSimpleSelectNotFound(Exception):
+    "Raised when using --select on dbt < 1.5 and a model node is not found in the manifest."
diff --git a/tests/test_dbt_parser.py b/tests/test_dbt_parser.py
@@ -10,7 +10,6 @@
     DataDiffDbtProfileNotFoundError,
     DataDiffDbtRedshiftPasswordOnlyError,
     DataDiffDbtRunResultsVersionError,
-    DataDiffDbtSelectVersionTooLowError,
     DataDiffDbtSnowflakeSetConnectionError,
 )
 
@@ -56,17 +55,18 @@ def test_get_models(self):
         mock_self.get_dbt_selection_models.assert_called_once_with(selection)
         self.assertEqual(models, mock_return_value)
 
-    def test_get_models_unsupported_manifest_version(self):
+    def test_get_models_simple_select(self):
         mock_self = Mock()
         mock_self.project_dir = Path()
         mock_self.dbt_version = "1.4.0"
         selection = "model+"
         mock_return_value = Mock()
-        mock_self.get_dbt_selection_models.return_value = mock_return_value
+        mock_self.get_simple_model_selection.return_value = mock_return_value
 
-        with self.assertRaises(DataDiffDbtSelectVersionTooLowError):
-            _ = DbtParser.get_models(mock_self, selection)
+        models = DbtParser.get_models(mock_self, selection)
         mock_self.get_dbt_selection_models.assert_not_called()
+        mock_self.get_simple_model_selection.assert_called_with(selection)
+        self.assertEqual(models, mock_return_value)
 
     def test_get_models_no_runner(self):
         mock_self = Mock()