D. J.:

nobodyPerfecZ · nobodyPerfecZ · commit 0a5943e94efb · 2025-08-20T20:10:58.000+02:00
- Added the leetcode problem and solution for 393
diff --git a/README.md b/README.md
@@ -236,6 +236,7 @@
 - [383 Ransom Note](https://leetcode.com/problems/ransom-note/description/)
 - [389 Find the Difference](https://leetcode.com/problems/find-the-difference/description/)
 - [392 Is Subsequence](https://leetcode.com/problems/is-subsequence/description/)
+- [393 UTF-8 Validation](https://leetcode.com/problems/utf-8-validation/description/)
 - [394 Decode String](https://leetcode.com/problems/decode-string/description/)
 - [399 Evaluate Division](https://leetcode.com/problems/evaluate-division/description/)
 - [405 Convert a Number to Hexadecimal](https://leetcode.com/problems/convert-a-number-to-hexadecimal/description/)
diff --git a/awesome_python_leetcode/_393_utf_8_validation.py b/awesome_python_leetcode/_393_utf_8_validation.py
@@ -0,0 +1,76 @@
+from typing import List
+
+
+class Solution:
+    """Base class for all LeetCode Problems."""
+
+    def validUtf8(self, data: List[int]) -> bool:
+        """
+        Given an integer array data representing the data, return whether it is a valid
+        UTF-8 encoding (i.e. it translates to a sequence of valid UTF-8 encoded
+        characters).
+
+        A character in UTF8 can be from 1 to 4 bytes long, subjected to the following
+        rules:
+        - For a 1-byte character, the first bit is a 0, followed by its Unicode code.
+        - For an n-bytes character, the first n bits are all one's, the n + 1 bit is 0,
+        followed by n - 1 bytes with the most significant 2 bits being 10.
+
+        This is how the UTF-8 encoding would work:
+
+        Number of Bytes     |        UTF-8 Octet Sequence
+                            |              (binary)
+        --------------------+-----------------------------------------
+            1               |   0xxxxxxx
+            2               |   110xxxxx 10xxxxxx
+            3               |   1110xxxx 10xxxxxx 10xxxxxx
+            4               |   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+        x denotes a bit in the binary form of a byte that may be either 0 or 1.
+
+        Note: The input is an array of integers. Only the least significant 8 bits of
+        each integer is used to store the data. This means each integer represents only
+        1 byte of data.
+        """
+        i = 0
+        while i < len(data):
+            character = data[i]
+
+            # Check if 1-byte character
+            msb = (character & (2**7)) >> 7
+            if msb == 0:
+                i += 1
+                continue
+
+            # Find the first zero
+            n = 0
+            for k in range(7, 2, -1):
+                if (character & (2**k)) >> k == 0:
+                    break
+                n += 1
+
+            # Check if character is at least 2 bytes long
+            if n <= 1:
+                return False
+
+            # Check if character is at most 4 bytes long
+            if n >= 5:
+                return False
+
+            # Check if lengths matches
+            if i + n > len(data):
+                return False
+
+            # Check if n-1 bytes are correct
+            j = 1
+            while j < n:
+                next_character = data[i + j]
+                # Check msb is 1
+                if (next_character & (2**7)) >> 7 != 1:
+                    return False
+                # Check second msb is 0
+                if (next_character & (2**6)) >> 6 != 0:
+                    return False
+                j += 1
+            i += n
+        return True
diff --git a/tests/test_393_utf_8_validation.py b/tests/test_393_utf_8_validation.py
@@ -0,0 +1,20 @@
+from typing import List
+
+import pytest
+
+from awesome_python_leetcode._393_utf_8_validation import Solution
+
+
+@pytest.mark.parametrize(
+    argnames=["data", "expected"],
+    argvalues=[
+        ([197, 130, 1], True),
+        ([235, 140, 4], False),
+        ([230, 136, 145], True),
+        ([145], False),
+    ],
+)
+def test_func(data: List[int], expected: List[int]):
+    """Tests the solution of a LeetCode problem."""
+    is_valid_utf_8 = Solution().validUtf8(data)
+    assert is_valid_utf_8 is expected