File tree Expand file tree Collapse file tree 2 files changed +6
-6
lines changed
+bert/+tokenizer/+internal Expand file tree Collapse file tree 2 files changed +6
-6
lines changed Original file line number Diff line number Diff line change 3434 u = this .cleanText(u );
3535 u = this .tokenizeCJK(u );
3636 text = u .string();
37- origTokens = this .whiteSpaceTokenize(text );
3837 if this .IgnoreCase
39- origTokens = lower(origTokens );
40- origTokens = textanalytics .unicode .nfd(origTokens );
38+ text = lower(text );
39+ text = textanalytics .unicode .nfd(text );
4140 end
42- u = textanalytics .unicode .UTF32(origTokens );
41+ u = textanalytics .unicode .UTF32(text );
4342 cats = u .characterCategories(' Granularity' ,' detailed' );
4443 if this .IgnoreCase
4544 [u ,cats ] = this .stripAccents(u ,cats );
Original file line number Diff line number Diff line change 6363 if start > 1
6464 sub.Data = [uint32(' ##' ),sub .Data ];
6565 end
66- if this .Vocab .isVocabularyWord(sub .string())
67- currentSub = sub .string();
66+ strForm = sub .string();
67+ if this .Vocab .isVocabularyWord(strForm )
68+ currentSub = strForm ;
6869 break
6970 end
7071 finish = finish - 1 ;
You can’t perform that action at this time.
0 commit comments