Skip to content

Commit c5ec9a5

Browse files
committed
Refactor tests and update-tables a small bit
1 parent 3caf1d7 commit c5ec9a5

File tree

2 files changed

+82
-104
lines changed

2 files changed

+82
-104
lines changed

bin/update-tables.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@
6868
*range(0xD7B0, 0xD800), # Hangul Jungseong O-Yeo .. Undefined Character of Hangul Jamo Extended-B
6969
)
7070

71+
HEX_STR_VS15 = 'FE0E'
72+
HEX_STR_VS16 = 'FE0F'
7173

7274
def _bisearch(ucs, table):
7375
"""A copy of wcwwidth._bisearch, to prevent having issues when depending on code that imports
@@ -433,7 +435,6 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx:
433435
"""
434436
table: dict[UnicodeVersion, TableDef] = {}
435437
unicode_latest = fetch_unicode_versions()[-1]
436-
hex_str_vs = 'FE0F'
437438

438439
wide_tables = fetch_table_wide_data().table
439440
unicode_version = UnicodeVersion.parse('9.0.0')
@@ -442,13 +443,13 @@ def fetch_table_vs16_data() -> UnicodeTableRenderCtx:
442443
# 15.1.0) and parse a single file for all individual releases
443444
table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest),
444445
ubound_unicode_version=unicode_version,
445-
hex_str_vs=hex_str_vs)
446+
hex_str_vs=HEX_STR_VS16)
446447

447448
# parse and join the final emoji release 12.0 of the earlier "type"
448449
table[unicode_version].values.update(
449450
parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(),
450451
ubound_unicode_version=unicode_version,
451-
hex_str_vs=hex_str_vs).values)
452+
hex_str_vs=HEX_STR_VS16).values)
452453

453454
# perform culling on any values that are already understood as 'wide'
454455
# without the variation-16 selector
@@ -488,7 +489,6 @@ def fetch_table_vs15_data() -> UnicodeTableRenderCtx:
488489
"""
489490
table: dict[UnicodeVersion, TableDef] = {}
490491
unicode_latest = fetch_unicode_versions()[-1]
491-
hex_str_vs = 'FE0E'
492492

493493
wide_tables = fetch_table_wide_data().table
494494
unicode_version = UnicodeVersion.parse('9.0.0')
@@ -497,13 +497,13 @@ def fetch_table_vs15_data() -> UnicodeTableRenderCtx:
497497
# 15.1.0) and parse a single file for all individual releases
498498
table[unicode_version] = parse_vs_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest),
499499
ubound_unicode_version=unicode_version,
500-
hex_str_vs=hex_str_vs)
500+
hex_str_vs=HEX_STR_VS15)
501501

502502
# parse and join the final emoji release 12.0 of the earlier "type"
503503
table[unicode_version].values.update(
504504
parse_vs_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(),
505505
ubound_unicode_version=unicode_version,
506-
hex_str_vs=hex_str_vs).values)
506+
hex_str_vs=HEX_STR_VS15).values)
507507

508508
# perform culling on any values that are already understood as 'narrow'
509509
# without the variation-15 selector

tests/test_emojis.py

Lines changed: 76 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -174,133 +174,111 @@ def test_recommended_emoji_zwj_sequences():
174174
assert num >= 1468
175175

176176

177-
def test_recommended_variation_16_sequences():
177+
@pytest.mark.parametrize('vs_char,expected_width', [
178+
('\ufe0f', 2),
179+
('\ufe0e', 1),
180+
])
181+
def test_recommended_variation_sequences(vs_char, expected_width):
178182
"""
179-
Test wcswidth of vs-16 sequences from unicode.org's emoji-variation-sequences.txt
183+
Test wcswidth of variation selector sequences from emoji-variation-sequences.txt
180184
"""
181-
# given,
182185
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
183186

184187
errors = []
185188
num = 0
186189
for sequence, line in zip(sequences, lines):
187190
num += 1
188-
if '\ufe0f' not in sequence:
189-
# filter for only \uFE0F (VS-16)
191+
if vs_char not in sequence:
190192
continue
191193
measured_width = wcwidth.wcswidth(sequence)
192-
if measured_width != 2:
194+
if measured_width != expected_width:
193195
errors.append({
194-
'expected_width': 2,
196+
'expected_width': expected_width,
195197
'line': line,
196-
'measured_width': wcwidth.wcswidth(sequence),
198+
'measured_width': measured_width,
197199
'sequence': sequence,
198200
})
199201

200-
# verify
201202
assert errors == []
202203
assert num >= 742
203204

204205

205-
def test_recommended_variation_15_sequences():
206-
"""
207-
Test wcswidth of vs-15 sequences from unicode.org's emoji-variation-sequences.txt
206+
@pytest.mark.parametrize('unicode_version,base_char,vs_char,base_width,expect_phrase_width', [
207+
('9.0', '\u2640', '\uFE0F', 1, 3),
208+
('9.0', '\U0001f4da', '\uFE0E', 2, 2),
209+
('8.0', '\u2640', '\uFE0F', 1, 2),
210+
('8.0', '\U0001f4da', '\uFE0E', 1, 2),
211+
])
212+
def test_variation_selector_unicode_version(unicode_version, base_char, vs_char, base_width, expect_phrase_width):
208213
"""
209-
# given,
210-
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
211-
212-
errors = []
213-
num = 0
214-
for sequence, line in zip(sequences, lines):
215-
num += 1
216-
if '\ufe0e' not in sequence:
217-
# filter for only \uFE0E (VS-15)
218-
continue
219-
measured_width = wcwidth.wcswidth(sequence)
220-
if measured_width != 1:
221-
errors.append({
222-
'expected_width': 1,
223-
'line': line,
224-
'measured_width': wcwidth.wcswidth(sequence),
225-
'sequence': sequence,
226-
})
227-
228-
# verify
229-
assert errors == []
230-
assert num >= 742
231-
232-
233-
def test_unicode_9_vs16():
234-
"""Verify effect of VS-16 on unicode_version 9.0 and later"""
235-
phrase = ("\u2640" # FEMALE SIGN
236-
"\uFE0F" # VARIATION SELECTOR-16
237-
"X" # ASCII Letter 'X'
238-
"\uFE0F") # VARIATION SELECTOR-16
239-
240-
expect_length_each = (1, 0, 1, 0)
241-
expect_length_phrase = 3
242-
243-
# exercise,
244-
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
245-
length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
246-
247-
# verify.
248-
assert length_each == expect_length_each
249-
assert length_phrase == expect_length_phrase
250-
251-
252-
def test_unicode_9_vs15():
253-
"""Verify effect of VS-15 on unicode_version 9.0 and later"""
254-
phrase = ("\U0001f4da" # BOOKS
255-
"\uFE0E" # VARIATION SELECTOR-15
256-
"X" # ASCII Letter 'X'
257-
"\uFE0E") # VARIATION SELECTOR-15
214+
Test variation selector behavior across Unicode versions.
258215
259-
expect_length_each = (2, 0, 1, 0)
260-
expect_length_phrase = 2
216+
VS-16 and VS-15 should affect width in Unicode 9.0+, but not in 8.0 and earlier.
217+
"""
218+
phrase = base_char + vs_char + "X" + vs_char
219+
expect_length_each = (base_width, 0, 1, 0)
261220

262-
# exercise,
263-
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
264-
length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
221+
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version=unicode_version) for w_char in phrase)
222+
length_phrase = wcwidth.wcswidth(phrase, unicode_version=unicode_version)
265223

266-
# verify.
267224
assert length_each == expect_length_each
268-
assert length_phrase == expect_length_phrase
269-
270-
271-
def test_unicode_8_vs16():
272-
"""Verify that VS-16 has no effect on unicode_version 8.0 and earlier"""
273-
phrase = ("\u2640" # FEMALE SIGN
274-
"\uFE0F" # VARIATION SELECTOR-16
275-
"X" # ASCII Letter 'X'
276-
"\uFE0F") # VARIATION SELECTOR-16
225+
assert length_phrase == expect_phrase_width
226+
227+
228+
@pytest.mark.parametrize('char,expected_base_width,expected_vs15_width,description', [
229+
('\u231A', 2, 1, 'WATCH'),
230+
('\u231B', 2, 1, 'HOURGLASS'),
231+
('\u2648', 2, 1, 'ARIES'),
232+
('\u26A1', 2, 1, 'HIGH VOLTAGE SIGN'),
233+
('\U0001F4DA', 2, 1, 'BOOKS'),
234+
('\U0001F3E0', 2, 1, 'HOUSE BUILDING'),
235+
('\u0023', 1, 1, 'NUMBER SIGN'),
236+
('\u002A', 1, 1, 'ASTERISK'),
237+
('\u00A9', 1, 1, 'COPYRIGHT SIGN'),
238+
])
239+
def test_vs15_width_effects(char, expected_base_width, expected_vs15_width, description):
240+
"""
241+
Test VS-15 width effects on various characters.
277242
278-
expect_length_each = (1, 0, 1, 0)
279-
expect_length_phrase = 2
243+
Wide chars (2→1): VS-15 converts to narrow text presentation
244+
Narrow chars (1→1): VS-15 has no effect, already narrow
245+
"""
246+
width_alone = wcwidth.wcswidth(char, unicode_version='9.0')
247+
width_with_vs15 = wcwidth.wcswidth(char + '\uFE0E', unicode_version='9.0')
280248

281-
# exercise,
282-
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
283-
length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
249+
assert width_alone == expected_base_width
250+
assert width_with_vs15 == expected_vs15_width
284251

285-
# verify.
286-
assert length_each == expect_length_each
287-
assert length_phrase == expect_length_phrase
288252

253+
def test_vs15_vs16_symmetry():
254+
"""Verify VS-15 and VS-16 have symmetric opposite effects on dual-presentation chars"""
255+
watch = '\u231A'
289256

290-
def test_unicode_8_vs15():
291-
"""Verify that VS-15 has no effect on unicode_version 8.0 and earlier"""
292-
phrase = ("\U0001f4da" # BOOKS
293-
"\uFE0E" # VARIATION SELECTOR-15
294-
"X" # ASCII Letter 'X'
295-
"\uFE0E") # VARIATION SELECTOR-15
257+
width_base = wcwidth.wcswidth(watch, unicode_version='9.0')
258+
width_vs15 = wcwidth.wcswidth(watch + '\uFE0E', unicode_version='9.0')
259+
width_vs16 = wcwidth.wcswidth(watch + '\uFE0F', unicode_version='9.0')
296260

297-
expect_length_each = (1, 0, 1, 0)
298-
expect_length_phrase = 2
261+
assert width_base == 2
262+
assert width_vs15 == 1
263+
assert width_vs16 == 2
299264

300-
# exercise,
301-
length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
302-
length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
303265

304-
# verify.
305-
assert length_each == expect_length_each
306-
assert length_phrase == expect_length_phrase
266+
def test_vs15_multiple_in_sequence():
267+
"""Verify multiple VS-15 applications in a single string"""
268+
phrase = (
269+
'\u231A\uFE0E' # WATCH + VS15 (wide -> narrow)
270+
'X' # ASCII
271+
'\U0001F4DA\uFE0E' # BOOKS + VS15 (wide -> narrow)
272+
'Y' # ASCII
273+
'\u2648\uFE0E' # ARIES + VS15 (wide -> narrow)
274+
)
275+
276+
width = wcwidth.wcswidth(phrase, unicode_version='9.0')
277+
assert width == 5
278+
279+
280+
def test_vs15_without_preceding_char():
281+
"""Verify VS-15 without a preceding measurable character has width 0"""
282+
phrase = '\uFE0E'
283+
width = wcwidth.wcwidth(phrase, unicode_version='9.0')
284+
assert width == 0

0 commit comments

Comments
 (0)