99import Foundation
1010import Hub
1111
12- class BertTokenizer {
12+ public class BertTokenizer {
1313 private let basicTokenizer = BasicTokenizer ( )
1414 private let wordpieceTokenizer : WordpieceTokenizer
1515 private let maxLen = 512
@@ -18,12 +18,12 @@ class BertTokenizer {
1818 private let vocab : [ String : Int ]
1919 private let ids_to_tokens : [ Int : String ]
2020
21- var bosToken : String ?
22- var bosTokenId : Int ?
23- var eosToken : String ?
24- var eosTokenId : Int ?
21+ public var bosToken : String ?
22+ public var bosTokenId : Int ?
23+ public var eosToken : String ?
24+ public var eosTokenId : Int ?
2525
26- init ( vocab: [ String : Int ] ,
26+ public init ( vocab: [ String : Int ] ,
2727 merges: [ String ] ? ,
2828 tokenizeChineseChars: Bool = true ,
2929 bosToken: String ? = nil ,
@@ -39,7 +39,7 @@ class BertTokenizer {
3939 self . eosTokenId = eosToken == nil ? nil : vocab [ eosToken!]
4040 }
4141
42- required convenience init ( tokenizerConfig: Config , tokenizerData: Config , addedTokens: [ String : Int ] ) throws {
42+ public required convenience init ( tokenizerConfig: Config , tokenizerData: Config , addedTokens: [ String : Int ] ) throws {
4343 guard let vocab = tokenizerData. model? . vocab? . dictionary as? [ String : Int ] else {
4444 throw TokenizerError . missingVocab
4545 }
@@ -51,7 +51,7 @@ class BertTokenizer {
5151 }
5252
5353
54- func tokenize( text: String ) -> [ String ] {
54+ public func tokenize( text: String ) -> [ String ] {
5555 let text = tokenizeChineseCharsIfNeed ( text)
5656 var tokens : [ String ] = [ ]
5757 for token in basicTokenizer. tokenize ( text: text) {
@@ -128,21 +128,21 @@ class BertTokenizer {
128128
129129
130130extension BertTokenizer : PreTrainedTokenizerModel {
131- var unknownToken : String ? { wordpieceTokenizer. unkToken }
132- var unknownTokenId : Int ? { vocab [ unknownToken!] }
133-
131+ public var unknownToken : String ? { wordpieceTokenizer. unkToken }
132+ public var unknownTokenId : Int ? { vocab [ unknownToken!] }
133+
134134 func encode( text: String ) -> [ Int ] { tokenizeToIds ( text: text) }
135135
136136 func decode( tokens: [ Int ] ) -> String {
137137 let tokens = unTokenize ( tokens: tokens)
138138 return convertWordpieceToBasicTokenList ( tokens)
139139 }
140140
141- func convertTokenToId( _ token: String ) -> Int ? {
141+ public func convertTokenToId( _ token: String ) -> Int ? {
142142 return vocab [ token] ?? unknownTokenId
143143 }
144144
145- func convertIdToToken( _ id: Int ) -> String ? {
145+ public func convertIdToToken( _ id: Int ) -> String ? {
146146 return ids_to_tokens [ id]
147147 }
148148}
0 commit comments