Skip to content

Commit 0a5943e

Browse files
committed
D. J.:
- Added the leetcode problem and solution for 393
1 parent 3e2a3fd commit 0a5943e

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@
236236
- [383 Ransom Note](https://leetcode.com/problems/ransom-note/description/)
237237
- [389 Find the Difference](https://leetcode.com/problems/find-the-difference/description/)
238238
- [392 Is Subsequence](https://leetcode.com/problems/is-subsequence/description/)
239+
- [393 UTF-8 Validation](https://leetcode.com/problems/utf-8-validation/description/)
239240
- [394 Decode String](https://leetcode.com/problems/decode-string/description/)
240241
- [399 Evaluate Division](https://leetcode.com/problems/evaluate-division/description/)
241242
- [405 Convert a Number to Hexadecimal](https://leetcode.com/problems/convert-a-number-to-hexadecimal/description/)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from typing import List
2+
3+
4+
class Solution:
5+
"""Base class for all LeetCode Problems."""
6+
7+
def validUtf8(self, data: List[int]) -> bool:
8+
"""
9+
Given an integer array data representing the data, return whether it is a valid
10+
UTF-8 encoding (i.e. it translates to a sequence of valid UTF-8 encoded
11+
characters).
12+
13+
A character in UTF8 can be from 1 to 4 bytes long, subjected to the following
14+
rules:
15+
- For a 1-byte character, the first bit is a 0, followed by its Unicode code.
16+
- For an n-bytes character, the first n bits are all one's, the n + 1 bit is 0,
17+
followed by n - 1 bytes with the most significant 2 bits being 10.
18+
19+
This is how the UTF-8 encoding would work:
20+
21+
Number of Bytes | UTF-8 Octet Sequence
22+
| (binary)
23+
--------------------+-----------------------------------------
24+
1 | 0xxxxxxx
25+
2 | 110xxxxx 10xxxxxx
26+
3 | 1110xxxx 10xxxxxx 10xxxxxx
27+
4 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
28+
29+
x denotes a bit in the binary form of a byte that may be either 0 or 1.
30+
31+
Note: The input is an array of integers. Only the least significant 8 bits of
32+
each integer is used to store the data. This means each integer represents only
33+
1 byte of data.
34+
"""
35+
i = 0
36+
while i < len(data):
37+
character = data[i]
38+
39+
# Check if 1-byte character
40+
msb = (character & (2**7)) >> 7
41+
if msb == 0:
42+
i += 1
43+
continue
44+
45+
# Find the first zero
46+
n = 0
47+
for k in range(7, 2, -1):
48+
if (character & (2**k)) >> k == 0:
49+
break
50+
n += 1
51+
52+
# Check if character is at least 2 bytes long
53+
if n <= 1:
54+
return False
55+
56+
# Check if character is at most 4 bytes long
57+
if n >= 5:
58+
return False
59+
60+
# Check if lengths matches
61+
if i + n > len(data):
62+
return False
63+
64+
# Check if n-1 bytes are correct
65+
j = 1
66+
while j < n:
67+
next_character = data[i + j]
68+
# Check msb is 1
69+
if (next_character & (2**7)) >> 7 != 1:
70+
return False
71+
# Check second msb is 0
72+
if (next_character & (2**6)) >> 6 != 0:
73+
return False
74+
j += 1
75+
i += n
76+
return True

tests/test_393_utf_8_validation.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from typing import List
2+
3+
import pytest
4+
5+
from awesome_python_leetcode._393_utf_8_validation import Solution
6+
7+
8+
@pytest.mark.parametrize(
9+
argnames=["data", "expected"],
10+
argvalues=[
11+
([197, 130, 1], True),
12+
([235, 140, 4], False),
13+
([230, 136, 145], True),
14+
([145], False),
15+
],
16+
)
17+
def test_func(data: List[int], expected: List[int]):
18+
"""Tests the solution of a LeetCode problem."""
19+
is_valid_utf_8 = Solution().validUtf8(data)
20+
assert is_valid_utf_8 is expected

0 commit comments

Comments
 (0)