Skip to content

Commit cfe1f20

Browse files
committed
optimizations
1 parent ea9fed4 commit cfe1f20

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

+bert/+tokenizer/+internal/BasicTokenizer.m

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,11 @@
3434
u = this.cleanText(u);
3535
u = this.tokenizeCJK(u);
3636
text = u.string();
37-
origTokens = this.whiteSpaceTokenize(text);
3837
if this.IgnoreCase
39-
origTokens = lower(origTokens);
40-
origTokens = textanalytics.unicode.nfd(origTokens);
38+
text = lower(text);
39+
text = textanalytics.unicode.nfd(text);
4140
end
42-
u = textanalytics.unicode.UTF32(origTokens);
41+
u = textanalytics.unicode.UTF32(text);
4342
cats = u.characterCategories('Granularity','detailed');
4443
if this.IgnoreCase
4544
[u,cats] = this.stripAccents(u,cats);

+bert/+tokenizer/+internal/WordPieceTokenizer.m

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@
6363
if start>1
6464
sub.Data = [uint32('##'),sub.Data];
6565
end
66-
if this.Vocab.isVocabularyWord(sub.string())
67-
currentSub = sub.string();
66+
strForm = sub.string();
67+
if this.Vocab.isVocabularyWord(strForm)
68+
currentSub = strForm;
6869
break
6970
end
7071
finish = finish-1;

0 commit comments

Comments
 (0)