Added tasks 2879-2883

ThanhNIT · web-flow · commit cff5c3c4f0b6 · 2023-12-23T10:25:18.000+02:00
diff --git a/src/main/java/g2801_2900/s2879_display_the_first_three_rows/readme.md b/src/main/java/g2801_2900/s2879_display_the_first_three_rows/readme.md
@@ -0,0 +1,43 @@
+2879\. Display the First Three Rows
+
+Easy
+
+DataFrame: `employees` 
+
+    +-------------+--------+ 
+    | Column Name | Type   | 
+    +-------------+--------+ 
+    | employee_id | int    | 
+    | name        | object | 
+    | department  | object | 
+    | salary      | int    | 
+    +-------------+--------+
+
+Write a solution to display the **first `3`** rows of this DataFrame.
+
+**Example 1:**
+
+**Input:** DataFrame employees 
+
+    +-------------+-----------+-----------------------+--------+ 
+    | employee_id | name      | department            | salary | 
+    +-------------+-----------+-----------------------+--------+ 
+    | 3           | Bob       | Operations            | 48675  | 
+    | 90          | Alice     | Sales                 | 11096  | 
+    | 9           | Tatiana   | Engineering           | 33805  | 
+    | 60          | Annabelle | InformationTechnology | 37678  | 
+    | 49          | Jonathan  | HumanResources        | 23793  | 
+    | 43          | Khaled    | Administration        | 40454  | 
+    +-------------+-----------+-----------------------+--------+
+
+**Output:** 
+
+    +-------------+---------+-------------+--------+ 
+    | employee_id | name    | department  | salary | 
+    +-------------+---------+-------------+--------+ 
+    | 3           | Bob     | Operations  | 48675  | 
+    | 90          | Alice   | Sales       | 11096  | 
+    | 9           | Tatiana | Engineering | 33805  | 
+    +-------------+---------+-------------+--------+
+
+**Explanation:** Only the first 3 rows are displayed.
diff --git a/src/main/java/g2801_2900/s2879_display_the_first_three_rows/solution.py b/src/main/java/g2801_2900/s2879_display_the_first_three_rows/solution.py
@@ -0,0 +1,7 @@
+# #Easy #2023_12_23_Time_406_ms_(96.44%)_Space_60.8_MB_(5.67%)
+
+import pandas as pd
+
+def selectFirstRows(zs: pd.DataFrame) -> pd.DataFrame:
+    return zs.head(3)
+    
diff --git a/src/main/java/g2801_2900/s2880_select_data/readme.md b/src/main/java/g2801_2900/s2880_select_data/readme.md
@@ -0,0 +1,38 @@
+2880\. Select Data
+
+Easy
+
+DataFrame students 
+
+    +-------------+--------+ 
+    | Column Name | Type   | 
+    +-------------+--------+ 
+    | student_id  | int    | 
+    | name        | object | 
+    | age         | int    | 
+    +-------------+--------+
+
+Write a solution to select the name and age of the student with `student_id = 101`.
+
+The result format is in the following example.
+
+**Example 1: Input:** 
+
+    +------------+---------+-----+ 
+    | student_id | name    | age | 
+    +------------+---------+-----+ 
+    | 101        | Ulysses | 13  | 
+    | 53         | William | 10  | 
+    | 128        | Henry   | 6   | 
+    | 3          | Henry   | 11  | 
+    +------------+---------+-----+
+
+**Output:** 
+
+    +---------+-----+ 
+    | name    | age | 
+    +---------+-----+ 
+    | Ulysses | 13  | 
+    +---------+-----+
+
+**Explanation:** Student Ulysses has student_id = 101, we select the name and age.
diff --git a/src/main/java/g2801_2900/s2880_select_data/solution.py b/src/main/java/g2801_2900/s2880_select_data/solution.py
@@ -0,0 +1,7 @@
+# #Easy #2023_12_23_Time_428_ms_(94.99%)_Space_60_MB_(83.82%)
+
+import pandas as pd
+
+def selectData(students: pd.DataFrame) -> pd.DataFrame:
+    return students[students.student_id == 101][['name','age']]
+
diff --git a/src/main/java/g2801_2900/s2881_create_a_new_column/readme.md b/src/main/java/g2801_2900/s2881_create_a_new_column/readme.md
@@ -0,0 +1,48 @@
+2881\. Create a New Column
+
+Easy
+
+DataFrame `employees` 
+
+    +-------------+--------+ 
+    | Column Name | Type.  | 
+    +-------------+--------+ 
+    | name        | object | 
+    | salary      | int.   | 
+    +-------------+--------+
+
+A company plans to provide its employees with a bonus.
+
+Write a solution to create a new column name `bonus` that contains the **doubled values** of the `salary` column.
+
+The result format is in the following example.
+
+**Example 1:**
+
+**Input:** DataFrame employees 
+
+    +---------+--------+ 
+    | name    | salary | 
+    +---------+--------+ 
+    | Piper   | 4548   | 
+    | Grace   | 28150  | 
+    | Georgia | 1103   | 
+    | Willow  | 6593   | 
+    | Finn    | 74576  | 
+    | Thomas  | 24433  | 
+    +---------+--------+
+
+**Output:** 
+
+    +---------+--------+--------+ 
+    | name    | salary | bonus  | 
+    +---------+--------+--------+ 
+    | Piper   | 4548   | 9096   | 
+    | Grace   | 28150  | 56300  | 
+    | Georgia | 1103   | 2206   | 
+    | Willow  | 6593   | 13186  | 
+    | Finn    | 74576  | 149152 | 
+    | Thomas  | 24433  | 48866  | 
+    +---------+--------+--------+
+
+**Explanation:** A new column bonus is created by doubling the value in the column salary.
diff --git a/src/main/java/g2801_2900/s2881_create_a_new_column/solution.py b/src/main/java/g2801_2900/s2881_create_a_new_column/solution.py
@@ -0,0 +1,7 @@
+# #Easy #2023_12_23_Time_437_ms_(81.98%)_Space_60.3_MB_(37.80%)
+
+import pandas as pd
+
+def createBonusColumn(employees: pd.DataFrame) -> pd.DataFrame:
+    employees["bonus"] = employees["salary"]*2
+    return employees
diff --git a/src/main/java/g2801_2900/s2882_drop_duplicate_rows/readme.md b/src/main/java/g2801_2900/s2882_drop_duplicate_rows/readme.md
@@ -0,0 +1,48 @@
+2882\. Drop Duplicate Rows
+
+Easy
+
+DataFrame customers 
+
+    +-------------+--------+ 
+    | Column Name | Type   | 
+    +-------------+--------+ 
+    | customer_id | int    | 
+    | name        | object | 
+    | email       | object | 
+    +-------------+--------+
+
+There are some duplicate rows in the DataFrame based on the `email` column.
+
+Write a solution to remove these duplicate rows and keep only the **first** occurrence.
+
+The result format is in the following example.
+
+**Example 1:** 
+
+**Input:** 
+
+    +-------------+---------+---------------------+ 
+    | customer_id | name    | email               | 
+    +-------------+---------+---------------------+ 
+    | 1           | Ella    | emily@example.com   | 
+    | 2           | David   | michael@example.com | 
+    | 3           | Zachary | sarah@example.com   | 
+    | 4           | Alice   | john@example.com    | 
+    | 5           | Finn    | john@example.com    | 
+    | 6           | Violet  | alice@example.com   | 
+    +-------------+---------+---------------------+ 
+
+**Output:** 
+
+    +-------------+---------+---------------------+ 
+    | customer_id | name    | email               | 
+    +-------------+---------+---------------------+ 
+    | 1           | Ella    | emily@example.com   | 
+    | 2           | David   | michael@example.com | 
+    | 3           | Zachary | sarah@example.com   | 
+    | 4           | Alice   | john@example.com    | 
+    | 6           | Violet  | alice@example.com   | 
+    +-------------+---------+---------------------+
+
+**Explanation:** Alic (customer_id = 4) and Finn (customer_id = 5) both use john@example.com, so only the first occurrence of this email is retained. 
diff --git a/src/main/java/g2801_2900/s2882_drop_duplicate_rows/solution.py b/src/main/java/g2801_2900/s2882_drop_duplicate_rows/solution.py
@@ -0,0 +1,7 @@
+# #Easy #2023_12_23_Time_405_ms_(97.36%)_Space_60.2_MB_(75.25%)
+
+import pandas as pd
+
+def dropDuplicateEmails(customers: pd.DataFrame) -> pd.DataFrame:
+    customers.drop_duplicates(subset='email', keep='first', inplace=True)
+    return customers
diff --git a/src/main/java/g2801_2900/s2883_drop_missing_data/readme.md b/src/main/java/g2801_2900/s2883_drop_missing_data/readme.md
@@ -0,0 +1,44 @@
+2883\. Drop Missing Data
+
+Easy
+
+DataFrame students 
+
+    +-------------+--------+ 
+    | Column Name | Type   | 
+    +-------------+--------+ 
+    | student_id | int    | 
+    | name        | object | 
+    | age         | int    | 
+    +-------------+--------+
+
+There are some rows having missing values in the `name` column.
+
+Write a solution to remove the rows with missing values.
+
+The result format is in the following example.
+
+**Example 1:**
+
+**Input:** 
+
+    +------------+---------+-----+ 
+    | student_id | name    | age | 
+    +------------+---------+-----+ 
+    | 32         | Piper   | 5   | 
+    | 217        | None    | 19  | 
+    | 779        | Georgia | 20  | 
+    | 849        | Willow  | 14  | 
+    +------------+---------+-----+
+
+**Output:** 
+
+    +------------+---------+-----+ 
+    | student_id | name    | age | 
+    +------------+---------+-----+ 
+    | 32         | Piper   | 5   | 
+    | 779        | Georgia | 20  | 
+    | 849        | Willow  | 14  | 
+    +------------+---------+-----+
+
+**Explanation:** Student with id 217 havs empty value in the name column, so it will be removed.
diff --git a/src/main/java/g2801_2900/s2883_drop_missing_data/solution.py b/src/main/java/g2801_2900/s2883_drop_missing_data/solution.py
@@ -0,0 +1,8 @@
+# #Easy #2023_12_23_Time_429_ms_(94.97%)_Space_61.2_MB_(12.30%)
+
+import pandas as pd
+
+def dropMissingData(students: pd.DataFrame) -> pd.DataFrame:
+   r=pd.DataFrame(students)
+   r.dropna(subset='name',inplace = True)
+   return r