From 2156623ead8d44c09bcfeb86838539f593dd0659 Mon Sep 17 00:00:00 2001
From: vpostrigan <postriganvova@gmail.com>
Date: Thu, 20 Jan 2022 13:11:50 +0200
Subject: [PATCH 1/2] Fix filename mistype in comment

---
 .../lab210_schema_introspection/SchemaIntrospectionApp.java  | 5 +++--
 .../SchemaIntrospectionScalaApp.scala                        | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/main/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java b/src/main/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java
index 3735f9a..b2180a4 100644
--- a/src/main/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java
+++ b/src/main/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java
@@ -39,8 +39,9 @@ private void start() {
         .master("local")
         .getOrCreate();
 
-    // Reads a CSV file with header, called books.csv, stores it in a
-    // dataframe
+    // Reads a CSV file with header, called
+    // Restaurants_in_Wake_County_NC.csv,
+    // stores it in a dataframe
     Dataset<Row> df = spark.read().format("csv")
         .option("header", "true")
         .load("data/Restaurants_in_Wake_County_NC.csv");
diff --git a/src/main/scala/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala b/src/main/scala/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala
index f7cba1b..65e7465 100644
--- a/src/main/scala/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala
+++ b/src/main/scala/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala
@@ -22,8 +22,9 @@ object SchemaIntrospectionScalaApp {
     val spark = SparkSession.builder.appName("Schema introspection for restaurants in Wake County, NC")
       .master("local").getOrCreate
 
-    // Reads a CSV file with header, called books.csv, stores it in a
-    // dataframe
+    // Reads a CSV file with header, called
+    // Restaurants_in_Wake_County_NC.csv,
+    // stores it in a dataframe
     var df = spark.read.format("csv").option("header", "true")
       .load("data/Restaurants_in_Wake_County_NC.csv")
 

From 2e31805849df135d71557298101e9dffa5d7301d Mon Sep 17 00:00:00 2001
From: vpostrigan <postriganvova@gmail.com>
Date: Thu, 20 Jan 2022 13:21:10 +0200
Subject: [PATCH 2/2] Fix mapping between Lat/Long and X/Y

---
 .../JsonIngestionSchemaManipulationApp.java                   | 4 ++--
 .../spark/ch03/lab230_dataframe_union/DataframeUnionApp.java  | 4 ++--
 .../jsonIngestionSchemaManipulationApp.py                     | 4 ++--
 src/main/python/lab230_dataframe_union/util.py                | 4 ++--
 .../JsonIngestionSchemaManipulationScalaApp.scala             | 4 ++--
 .../ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala  | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/main/java/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java b/src/main/java/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java
index 188de74..e270fb9 100644
--- a/src/main/java/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java
+++ b/src/main/java/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java
@@ -60,8 +60,8 @@ private void start() {
         .withColumn("dateEnd", df.col("fields.closing_date"))
         .withColumn("type",
             split(df.col("fields.type_description"), " - ").getItem(1))
-        .withColumn("geoX", df.col("fields.geolocation").getItem(0))
-        .withColumn("geoY", df.col("fields.geolocation").getItem(1));
+        .withColumn("geoX", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoY", df.col("fields.geolocation").getItem(0));
     df = df.withColumn("id",
         concat(df.col("state"), lit("_"),
             df.col("county"), lit("_"),
diff --git a/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java b/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java
index 3ac17a0..599325e 100644
--- a/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java
+++ b/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java
@@ -121,8 +121,8 @@ private Dataset<Row> buildDurhamRestaurantsDataframe() {
         .withColumn("dateEnd", df.col("fields.closing_date"))
         .withColumn("type",
             split(df.col("fields.type_description"), " - ").getItem(1))
-        .withColumn("geoX", df.col("fields.geolocation").getItem(0))
-        .withColumn("geoY", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoX", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoY", df.col("fields.geolocation").getItem(0))
         .drop(df.col("fields"))
         .drop(df.col("geometry"))
         .drop(df.col("record_timestamp"))
diff --git a/src/main/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py b/src/main/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py
index 4009536..74b39c0 100644
--- a/src/main/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py
+++ b/src/main/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py
@@ -41,8 +41,8 @@ def main(spark):
                 .withColumn("dateStart", F.col("fields.opening_date")) \
                 .withColumn("dateEnd", F.col("fields.closing_date")) \
                 .withColumn("type", F.split(F.col("fields.type_description"), " - ").getItem(1)) \
-                .withColumn("geoX", F.col("fields.geolocation").getItem(0)) \
-                .withColumn("geoY", F.col("fields.geolocation").getItem(1))
+                .withColumn("geoX", F.col("fields.geolocation").getItem(1)) \
+                .withColumn("geoY", F.col("fields.geolocation").getItem(0))
 
         df = df.withColumn("id", F.concat(F.col("state"), F.lit("_"),
                                           F.col("county"), F.lit("_"),
diff --git a/src/main/python/lab230_dataframe_union/util.py b/src/main/python/lab230_dataframe_union/util.py
index 6299b29..f0cfcf3 100644
--- a/src/main/python/lab230_dataframe_union/util.py
+++ b/src/main/python/lab230_dataframe_union/util.py
@@ -56,8 +56,8 @@ def build_durham_restaurants_dataframe(df):
             .withColumn("dateStart", F.col("fields.opening_date")) \
             .withColumn("dateEnd", F.col("fields.closing_date")) \
             .withColumn("type", F.split(F.col("fields.type_description"), " - ").getItem(1)) \
-            .withColumn("geoX", F.col("fields.geolocation").getItem(0)) \
-            .withColumn("geoY", F.col("fields.geolocation").getItem(1)) \
+            .withColumn("geoX", F.col("fields.geolocation").getItem(1)) \
+            .withColumn("geoY", F.col("fields.geolocation").getItem(0)) \
             .drop(*drop_cols)
 
     df = df.withColumn("id",
diff --git a/src/main/scala/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala b/src/main/scala/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala
index 961a0e9..18ec599 100644
--- a/src/main/scala/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala
+++ b/src/main/scala/net/jgp/books/spark/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala
@@ -42,8 +42,8 @@ object JsonIngestionSchemaManipulationScalaApp {
           .withColumn("dateStart", col("fields.opening_date"))
           .withColumn("dateEnd", col("fields.closing_date"))
           .withColumn("type", split(col("fields.type_description"), " - ").getItem(1))
-          .withColumn("geoX", col("fields.geolocation").getItem(0))
-          .withColumn("geoY", col("fields.geolocation").getItem(1))
+          .withColumn("geoX", col("fields.geolocation").getItem(1))
+          .withColumn("geoY", col("fields.geolocation").getItem(0))
 
     val cols_list = List(col("state"), lit("_"), col("county"), lit("_"), col("datasetId"))
 
diff --git a/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala b/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala
index 9225572..551b3fa 100644
--- a/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala
+++ b/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala
@@ -95,8 +95,8 @@ object DataframeUnionScalaApp {
                 .withColumn("dateStart", col("fields.opening_date"))
                 .withColumn("dateEnd", col("fields.closing_date"))
                 .withColumn("type", split(col("fields.type_description"), " - ").getItem(1))
-                .withColumn("geoX", col("fields.geolocation").getItem(0))
-                .withColumn("geoY", col("fields.geolocation").getItem(1))
+                .withColumn("geoX", col("fields.geolocation").getItem(1))
+                .withColumn("geoY", col("fields.geolocation").getItem(0))
                 .drop(drop_cols:_*)
 
     df1 = df1.withColumn("id",