apache
diff --git a/‎common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java‎
Lines changed: 1 addition & 0 deletions b/‎common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala‎
Lines changed: 11 additions & 0 deletions b/‎common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala‎
Lines changed: 2 additions & 2 deletions b/‎connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 0 additions & 10 deletions b/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/pyspark/sql/connect/client/core.py‎
Lines changed: 14 additions & 70 deletions b/‎python/pyspark/sql/connect/client/core.py‎
Lines changed: 14 additions & 70 deletions
@@ -787,6 +787,7 @@ public enum LogKeys implements LogKey {
   STREAM_CHUNK_ID,
   STREAM_ID,
   STREAM_NAME,
+  STREAM_SHOULD_FORCE_SNAPSHOT,
   SUBMISSION_ID,
   SUBSAMPLING_RATE,
   SUB_QUERY,
 
@@ -39,6 +39,16 @@ private[spark] trait SparkSystemUtils {
    */
   val javaVersion = JavaUtils.javaVersion
 
+  /**
+   * Whether the underlying Java version is at most 17.
+   */
+  val isJavaVersionAtMost17 = Runtime.version().feature() <= 17
+
+  /**
+   * Whether the underlying Java version is at least 21.
+   */
+  val isJavaVersionAtLeast21 = Runtime.version().feature() >= 21
+
   /**
    * Whether the underlying operating system is Windows.
    */
@@ -63,6 +73,7 @@ private[spark] trait SparkSystemUtils {
    * Whether the underlying operating system is UNIX.
    */
   val isUnix = JavaUtils.isUnix
+
 }
 
 object SparkSystemUtils extends SparkSystemUtils
@@ -23,12 +23,12 @@ class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
   // sarutak/oracle-free is a custom fork of gvenzl/oracle-free which allows to set timeout for
   // password initialization. See SPARK-54076 for details.
   lazy override val imageName =
-    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "sarutak/oracle-free:23.9-slim")
+    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "sarutak/oracle-free:23.26.0-slim")
   val oracle_password = "Th1s1sThe0racle#Pass"
   override val env = Map(
     "ORACLE_PWD" -> oracle_password, // oracle images uses this
     "ORACLE_PASSWORD" -> oracle_password, // gvenzl/oracle-free uses this
-    "PASSWORD_INIT_TIMEOUT" -> "30"
+    "PASSWORD_INIT_TIMEOUT" -> "60"
   )
   override val usesIpc = false
   override val jdbcPort: Int = 1521
 
@@ -1855,16 +1855,6 @@ private[spark] object Utils
     getHadoopFileSystem(new URI(path), conf)
   }
 
-  /**
-   * Whether the underlying Java version is at most 17.
-   */
-  val isJavaVersionAtMost17 = Runtime.version().feature() <= 17
-
-  /**
-   * Whether the underlying Java version is at least 21.
-   */
-  val isJavaVersionAtLeast21 = Runtime.version().feature() >= 21
-
   /**
    * Whether the underlying JVM prefer IPv6 addresses.
    */
 
@@ -220,7 +220,7 @@
     <netty.version>4.2.7.Final</netty.version>
     <netty-tcnative.version>2.0.74.Final</netty-tcnative.version>
     <icu4j.version>77.1</icu4j.version>
-    <junit.version>6.0.0</junit.version>
+    <junit.version>6.0.1</junit.version>
     <jline.version>2.14.6</jline.version>
     <!--
       SPARK-50299: When updating `sbt-jupiter-interface.version`,
 
@@ -99,8 +99,9 @@
 )
 from pyspark.sql.connect.observation import Observation
 from pyspark.sql.connect.utils import get_python_ver
-from pyspark.sql.pandas.types import _create_converter_to_pandas, from_arrow_schema
-from pyspark.sql.types import DataType, StructType, _has_type
+from pyspark.sql.pandas.types import from_arrow_schema
+from pyspark.sql.pandas.conversion import _convert_arrow_table_to_pandas
+from pyspark.sql.types import DataType, StructType
 from pyspark.util import PythonEvalType
 from pyspark.storagelevel import StorageLevel
 from pyspark.errors import PySparkValueError, PySparkAssertionError, PySparkNotImplementedError
@@ -987,88 +988,31 @@ def to_pandas(
         # Get all related configs in a batch
         (
             timezone,
-            struct_in_pandas,
-            self_destruct,
+            structHandlingMode,
+            selfDestruct,
         ) = self.get_configs(
             "spark.sql.session.timeZone",
             "spark.sql.execution.pandas.structHandlingMode",
             "spark.sql.execution.arrow.pyspark.selfDestruct.enabled",
         )
 
         table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
-            req, observations, self_destruct == "true"
+            req, observations, selfDestruct == "true"
         )
         assert table is not None
         ei = ExecutionInfo(metrics, observed_metrics)
 
         schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
         assert schema is not None and isinstance(schema, StructType)
 
-        # Rename columns to avoid duplicated column names during processing
-        temp_col_names = [f"col_{i}" for i in range(len(schema.names))]
-        table = table.rename_columns(temp_col_names)
-
-        # Pandas DataFrame created from PyArrow uses datetime64[ns] for date type
-        # values, but we should use datetime.date to match the behavior with when
-        # Arrow optimization is disabled.
-        pandas_options = {"coerce_temporal_nanoseconds": True}
-        if self_destruct == "true" and table.num_rows > 0:
-            # Configure PyArrow to use as little memory as possible:
-            # self_destruct - free columns as they are converted
-            # split_blocks - create a separate Pandas block for each column
-            # use_threads - convert one column at a time
-            pandas_options.update(
-                {
-                    "self_destruct": True,
-                    "split_blocks": True,
-                    "use_threads": False,
-                }
-            )
-
-        if len(schema.names) > 0:
-            error_on_duplicated_field_names: bool = False
-            if struct_in_pandas == "legacy" and any(
-                _has_type(f.dataType, StructType) for f in schema.fields
-            ):
-                error_on_duplicated_field_names = True
-                struct_in_pandas = "dict"
-
-            # SPARK-51112: If the table is empty, we avoid using pyarrow to_pandas to create the
-            # DataFrame, as it may fail with a segmentation fault.
-            if table.num_rows == 0:
-                # For empty tables, create empty Series with converters to preserve dtypes
-                pdf = pd.concat(
-                    [
-                        _create_converter_to_pandas(
-                            field.dataType,
-                            field.nullable,
-                            timezone=timezone,
-                            struct_in_pandas=struct_in_pandas,
-                            error_on_duplicated_field_names=error_on_duplicated_field_names,
-                        )(pd.Series([], name=temp_col_names[i], dtype="object"))
-                        for i, field in enumerate(schema.fields)
-                    ],
-                    axis="columns",
-                )
-            else:
-                pdf = pd.concat(
-                    [
-                        _create_converter_to_pandas(
-                            field.dataType,
-                            field.nullable,
-                            timezone=timezone,
-                            struct_in_pandas=struct_in_pandas,
-                            error_on_duplicated_field_names=error_on_duplicated_field_names,
-                        )(arrow_col.to_pandas(**pandas_options))
-                        for arrow_col, field in zip(table.columns, schema.fields)
-                    ],
-                    axis="columns",
-                )
-            # Restore original column names (including duplicates)
-            pdf.columns = schema.names
-        else:
-            # empty columns
-            pdf = table.to_pandas(**pandas_options)
+        pdf = _convert_arrow_table_to_pandas(
+            arrow_table=table,
+            schema=schema,
+            timezone=timezone,
+            struct_handling_mode=structHandlingMode,
+            date_as_object=False,
+            self_destruct=selfDestruct == "true",
+        )
 
         if len(metrics) > 0:
             pdf.attrs["metrics"] = metrics