zifeif2
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 1 addition & 1 deletion b/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkPipelines.scala‎
Lines changed: 4 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkPipelines.scala‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎python/pyspark/pipelines/api.py‎
Lines changed: 2 additions & 2 deletions b/‎python/pyspark/pipelines/api.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/pyspark/pipelines/cli.py‎
Lines changed: 14 additions & 7 deletions b/‎python/pyspark/pipelines/cli.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎python/pyspark/pipelines/spark_connect_pipeline.py‎
Lines changed: 3 additions & 2 deletions b/‎python/pyspark/pipelines/spark_connect_pipeline.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala‎
Lines changed: 3 additions & 3 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/DataflowGraphRegistry.scala‎
Lines changed: 3 additions & 2 deletions b/‎sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/DataflowGraphRegistry.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala‎
Lines changed: 21 additions & 23 deletions b/‎sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala‎
Lines changed: 21 additions & 23 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala‎
Lines changed: 1 addition & 1 deletion
@@ -5253,7 +5253,7 @@
   "RUN_EMPTY_PIPELINE" : {
     "message" : [
       "Pipelines are expected to have at least one non-temporary dataset defined (tables, persisted views) but no non-temporary datasets were found in your pipeline.",
-      "Please verify that you have included the expected source files, and that your source code includes table definitions (e.g., CREATE MATERIALIZED VIEW in SQL code, @sdp.table in python code)."
+      "Please verify that you have included the expected source files, and that your source code includes table definitions (e.g., CREATE MATERIALIZED VIEW in SQL code, @dp.table in python code)."
     ],
     "sqlState" : "42617"
   },
 
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-import java.util
+import java.util.{Arrays => JArrays, List => JList}
 import java.util.Locale
 
 import scala.collection.mutable.ArrayBuffer
@@ -46,7 +46,7 @@ object SparkPipelines extends Logging {
       pipelinesCliFile: String,
       args: Array[String]): Seq[String] = {
     val (sparkSubmitArgs, pipelinesArgs) = splitArgs(args)
-    (sparkSubmitArgs ++ Seq(pipelinesCliFile) ++ pipelinesArgs)
+    sparkSubmitArgs ++ Seq(pipelinesCliFile) ++ pipelinesArgs
   }
 
   /**
@@ -59,7 +59,7 @@ object SparkPipelines extends Logging {
     var remote = "local"
 
     new SparkSubmitArgumentsParser() {
-      parse(util.Arrays.asList(args: _*))
+      parse(JArrays.asList(args: _*))
 
       override protected def handle(opt: String, value: String): Boolean = {
         if (opt == "--remote") {
@@ -91,7 +91,7 @@ object SparkPipelines extends Logging {
         true
       }
 
-      override protected def handleExtraArgs(extra: util.List[String]): Unit = {
+      override protected def handleExtraArgs(extra: JList[String]): Unit = {
         pipelinesArgs.appendAll(extra.asScala)
       }
 
 
@@ -470,12 +470,12 @@ def create_sink(
     options: Optional[Dict[str, str]] = None,
 ) -> None:
     """
-    Creates a sink that can be targeted by streaming flows, providing a generic destination \
+    Creates a sink that can be targeted by streaming flows, providing a generic destination
     for flows to send data external to the pipeline.
 
     :param name: The name of the sink.
     :param format: The format of the sink, e.g. "parquet".
-    :param options: A dict where the keys are the property names and the values are the \
+    :param options: A dict where the keys are the property names and the values are the
         property values. These properties will be set on the sink.
     """
     if type(name) is not str:
 
@@ -225,8 +225,8 @@ def register_definitions(
     dataflow_graph_id: str,
 ) -> None:
     """Register the graph element definitions in the pipeline spec with the given registry.
-    - Looks for Python files matching the glob patterns in the spec and imports them.
-    - Looks for SQL files matching the blob patterns in the spec and registers thems.
+    - Import Python files matching the glob patterns in the spec.
+    - Register SQL files matching the glob patterns in the spec.
     """
     path = spec_path.parent
     with change_dir(path):
@@ -356,8 +356,9 @@ def parse_table_list(value: str) -> List[str]:
     return [table.strip() for table in value.split(",") if table.strip()]
 
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Pipeline CLI")
+def main() -> None:
+    """The entry point of spark-pipelines CLI."""
+    parser = argparse.ArgumentParser(description="Pipelines CLI")
     subparsers = parser.add_subparsers(dest="command", required=True)
 
     # "run" subcommand
@@ -375,7 +376,9 @@ def parse_table_list(value: str) -> List[str]:
         default=[],
     )
     run_parser.add_argument(
-        "--full-refresh-all", action="store_true", help="Perform a full graph reset and recompute."
+        "--full-refresh-all",
+        action="store_true",
+        help="Perform a full graph reset and recompute.",
     )
     run_parser.add_argument(
         "--refresh",
@@ -395,7 +398,7 @@ def parse_table_list(value: str) -> List[str]:
     # "init" subcommand
     init_parser = subparsers.add_parser(
         "init",
-        help="Generates a simple pipeline project, including a spec file and example definitions.",
+        help="Generate a sample pipeline project, with a spec file and example transformations.",
     )
     init_parser.add_argument(
         "--name",
@@ -424,7 +427,7 @@ def parse_table_list(value: str) -> List[str]:
                 full_refresh=args.full_refresh,
                 full_refresh_all=args.full_refresh_all,
                 refresh=args.refresh,
-                dry=args.command == "dry-run",
+                dry=False,
             )
         else:
             assert args.command == "dry-run"
@@ -437,3 +440,7 @@ def parse_table_list(value: str) -> List[str]:
             )
     elif args.command == "init":
         init(args.name)
+
+
+if __name__ == "__main__":
+    main()
@@ -29,7 +29,7 @@ def create_dataflow_graph(
     default_database: Optional[str],
     sql_conf: Optional[Mapping[str, str]],
 ) -> str:
-    """Create a dataflow graph in in the Spark Connect server.
+    """Create a dataflow graph in the Spark Connect server.
 
     :returns: The ID of the created dataflow graph.
     """
@@ -57,7 +57,7 @@ def handle_pipeline_events(iter: Iterator[Dict[str, Any]]) -> None:
             continue
         elif "pipeline_event_result" not in result.keys():
             raise PySparkValueError(
-                "Pipeline logs stream handler received an unexpected result: " f"{result}"
+                f"Pipeline logs stream handler received an unexpected result: {result}"
             )
         else:
             event = result["pipeline_event_result"].event
@@ -76,6 +76,7 @@ def start_run(
 ) -> Iterator[Dict[str, Any]]:
     """Start a run of the dataflow graph in the Spark Connect server.
 
+    :param spark: SparkSession.
     :param dataflow_graph_id: The ID of the dataflow graph to start.
     :param full_refresh: List of datasets to reset and recompute.
     :param full_refresh_all: Perform a full graph reset and recompute.
 
@@ -546,7 +546,6 @@ case class CreateTableAsSelect(
  * The base command representation for a statement that can be part of a Declarative Pipeline to
  * define a pipeline dataset (MV or ST).
  */
-
 trait CreatePipelineDataset extends Command {
   // The name of the dataset.
   val name: LogicalPlan
@@ -567,7 +566,8 @@ trait CreatePipelineDataset extends Command {
 /**
  * An extension of the base command representation that represents a CTAS style CREATE statement.
  */
-trait CreatePipelineDatasetAsSelect extends BinaryCommand
+trait CreatePipelineDatasetAsSelect
+  extends BinaryCommand
   with CreatePipelineDataset
   with CTEInChildren {
 
@@ -2003,7 +2003,7 @@ case class Call(
  * representation of the matching SQL syntax and cannot be executed. Instead, it is interpreted by
  * the pipelines submodule during a pipeline execution
  *
- * @param name  the name of this flow
+ * @param name the name of this flow
  * @param flowOperation the logical plan of the actual transformation this flow should execute
  * @param comment an optional comment describing this flow
  */
 
@@ -32,13 +32,14 @@ class DataflowGraphRegistry {
 
   private val dataflowGraphs = new ConcurrentHashMap[String, GraphRegistrationContext]()
 
-  /** Registers a DataflowGraph and generates a unique id to associate with the graph */
+  /**
+   * Registers a GraphRegistrationContext and generates a unique id to associate with the graph
+   */
   def createDataflowGraph(
       defaultCatalog: String,
       defaultDatabase: String,
       defaultSqlConf: Map[String, String]): String = {
     val graphId = java.util.UUID.randomUUID().toString
-    // TODO: propagate pipeline catalog and schema from pipeline spec here.
     dataflowGraphs.put(
       graphId,
       new GraphRegistrationContext(defaultCatalog, defaultDatabase, defaultSqlConf))
 
@@ -49,8 +49,6 @@ private[connect] object PipelinesHandler extends Logging {
    *   Command to be handled
    * @param responseObserver
    *   The response observer where the response will be sent
-   * @param sparkSession
-   *   The spark session
    * @param transformRelationFunc
    *   Function used to convert a relation to a LogicalPlan. This is used when determining the
    *   LogicalPlan that a flow returns.
@@ -87,9 +85,7 @@ private[connect] object PipelinesHandler extends Logging {
           defineOutput(cmd.getDefineOutput, sessionHolder)
         val identifierBuilder = ResolvedIdentifier.newBuilder()
         resolvedDataset.catalog.foreach(identifierBuilder.setCatalogName)
-        resolvedDataset.database.foreach { ns =>
-          identifierBuilder.addNamespace(ns)
-        }
+        resolvedDataset.database.foreach(identifierBuilder.addNamespace)
         identifierBuilder.setTableName(resolvedDataset.identifier)
         val identifier = identifierBuilder.build()
         PipelineCommandResult
@@ -116,7 +112,7 @@ private[connect] object PipelinesHandler extends Logging {
           .setDefineFlowResult(
             PipelineCommandResult.DefineFlowResult
               .newBuilder()
-              .setResolvedIdentifier(identifierBuilder)
+              .setResolvedIdentifier(identifier)
               .build())
           .build()
       case proto.PipelineCommand.CommandTypeCase.START_RUN =>
@@ -181,15 +177,21 @@ private[connect] object PipelinesHandler extends Logging {
     val defaultCatalog = Option
       .when(cmd.hasDefaultCatalog)(cmd.getDefaultCatalog)
       .getOrElse {
-        logInfo(s"No default catalog was supplied. Falling back to the current catalog.")
-        sessionHolder.session.catalog.currentCatalog()
+        val currentCatalog = sessionHolder.session.catalog.currentCatalog()
+        logInfo(
+          "No default catalog was supplied. " +
+            s"Falling back to the current catalog: $currentCatalog.")
+        currentCatalog
       }
 
     val defaultDatabase = Option
       .when(cmd.hasDefaultDatabase)(cmd.getDefaultDatabase)
       .getOrElse {
-        logInfo(s"No default database was supplied. Falling back to the current database.")
-        sessionHolder.session.catalog.currentDatabase
+        val currentDatabase = sessionHolder.session.catalog.currentDatabase
+        logInfo(
+          "No default database was supplied. " +
+            s"Falling back to the current database: $currentDatabase.")
+        currentDatabase
       }
 
     val defaultSqlConf = cmd.getSqlConfMap.asScala.toMap
@@ -280,18 +282,15 @@ private[connect] object PipelinesHandler extends Logging {
                 output.getSourceCodeLocation.getFileName),
               line = Option.when(output.getSourceCodeLocation.hasLineNumber)(
                 output.getSourceCodeLocation.getLineNumber),
-              objectType = Option(QueryOriginType.View.toString),
+              objectType = Some(QueryOriginType.View.toString),
               objectName = Option(viewIdentifier.unquotedString),
-              language = Option(Python())),
+              language = Some(Python())),
             properties = Map.empty,
             sqlText = None))
         viewIdentifier
       case proto.OutputType.SINK =>
-        val dataflowGraphId = output.getDataflowGraphId
-        val graphElementRegistry =
-          sessionHolder.dataflowGraphRegistry.getDataflowGraphOrThrow(dataflowGraphId)
         val identifier = GraphIdentifierManager
-          .parseTableIdentifier(name = output.getOutputName, spark = sessionHolder.session)
+          .parseTableIdentifier(output.getOutputName, sessionHolder.session)
         val sinkDetails = output.getSinkDetails
         graphElementRegistry.registerSink(
           SinkImpl(
@@ -305,7 +304,7 @@ private[connect] object PipelinesHandler extends Logging {
                 output.getSourceCodeLocation.getLineNumber),
               objectType = Option(QueryOriginType.Sink.toString),
               objectName = Option(identifier.unquotedString),
-              language = Option(Python()))))
+              language = Some(Python()))))
         identifier
       case _ =>
         throw new IllegalArgumentException(s"Unknown output type: ${output.getOutputType}")
@@ -342,8 +341,7 @@ private[connect] object PipelinesHandler extends Logging {
     val rawDestinationIdentifier = GraphIdentifierManager
       .parseTableIdentifier(name = flow.getTargetDatasetName, spark = sessionHolder.session)
     val flowWritesToView =
-      graphElementRegistry
-        .getViews()
+      graphElementRegistry.getViews
         .filter(_.isInstanceOf[TemporaryView])
         .exists(_.identifier == rawDestinationIdentifier)
     val flowWritesToSink =
@@ -353,7 +351,7 @@ private[connect] object PipelinesHandler extends Logging {
     // If the flow is created implicitly as part of defining a view or that it writes to a sink,
     // then we do not qualify the flow identifier and the flow destination. This is because
     // views and sinks are not permitted to have multipart
-    val isImplicitFlowForTempView = (isImplicitFlow && flowWritesToView)
+    val isImplicitFlowForTempView = isImplicitFlow && flowWritesToView
     val Seq(flowIdentifier, destinationIdentifier) =
       Seq(rawFlowIdentifier, rawDestinationIdentifier).map { rawIdentifier =>
         if (isImplicitFlowForTempView || flowWritesToSink) {
@@ -370,7 +368,7 @@ private[connect] object PipelinesHandler extends Logging {
 
     val relationFlowDetails = flow.getRelationFlowDetails
     graphElementRegistry.registerFlow(
-      new UnresolvedFlow(
+      UnresolvedFlow(
         identifier = flowIdentifier,
         destinationIdentifier = destinationIdentifier,
         func = FlowAnalysis.createFlowFunctionFromLogicalPlan(
@@ -383,9 +381,9 @@ private[connect] object PipelinesHandler extends Logging {
             flow.getSourceCodeLocation.getFileName),
           line = Option.when(flow.getSourceCodeLocation.hasLineNumber)(
             flow.getSourceCodeLocation.getLineNumber),
-          objectType = Option(QueryOriginType.Flow.toString),
+          objectType = Some(QueryOriginType.Flow.toString),
           objectName = Option(flowIdentifier.unquotedString),
-          language = Option(Python()))))
+          language = Some(Python()))))
     flowIdentifier
   }
 
 
@@ -1385,7 +1385,7 @@ class SparkSqlAstBuilder extends AstBuilder {
 
     if (colConstraints.nonEmpty) {
       throw operationNotAllowed("Pipeline datasets do not currently support column constraints. " +
-        "Please remove and CHECK, UNIQUE, PK, and FK constraints specified on the pipeline " +
+        "Please remove any CHECK, UNIQUE, PK, and FK constraints specified on the pipeline " +
         "dataset.", ctx)
     }
 
 
@@ -110,7 +110,7 @@ trait CreatePipelineDatasetAsSelectParserSuiteBase extends CommandSuiteBase {
       exception = ex,
       condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> ("Pipeline datasets do not currently support column " +
-        "constraints. Please remove and CHECK, UNIQUE, PK, and FK constraints specified on the " +
+        "constraints. Please remove any CHECK, UNIQUE, PK, and FK constraints specified on the " +
         "pipeline dataset.")),
       queryContext = ex.getQueryContext.map(toExpectedContext)
     )
Original file line number	Diff line number	Diff line change
`@@ -1385,7 +1385,7 @@ class SparkSqlAstBuilder extends AstBuilder {`
`1385`	`1385`
`1386`	`1386`	`if (colConstraints.nonEmpty) {`
`1387`	`1387`	`throw operationNotAllowed("Pipeline datasets do not currently support column constraints. " +`
`1388`		`- "Please remove and CHECK, UNIQUE, PK, and FK constraints specified on the pipeline " +`
	`1388`	`+ "Please remove any CHECK, UNIQUE, PK, and FK constraints specified on the pipeline " +`
`1389`	`1389`	`"dataset.", ctx)`
`1390`	`1390`	`}`
`1391`	`1391`
Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@ trait CreatePipelineDatasetAsSelectParserSuiteBase extends CommandSuiteBase {`
`110`	`110`	`exception = ex,`
`111`	`111`	`condition = "_LEGACY_ERROR_TEMP_0035",`
`112`	`112`	`parameters = Map("message" -> ("Pipeline datasets do not currently support column " +`
`113`		`- "constraints. Please remove and CHECK, UNIQUE, PK, and FK constraints specified on the " +`
	`113`	`+ "constraints. Please remove any CHECK, UNIQUE, PK, and FK constraints specified on the " +`
`114`	`114`	`"pipeline dataset.")),`
`115`	`115`	`queryContext = ex.getQueryContext.map(toExpectedContext)`
`116`	`116`	`)`