@@ -58,6 +58,8 @@ pub enum Error {
5858 BadEntropyBitCount ( usize ) ,
5959 /// The mnemonic has an invalid checksum.
6060 InvalidChecksum ,
61+ /// The word list can be interpreted as multiple languages.
62+ AmbiguousWordList ( Vec < Language > ) ,
6163}
6264
6365impl fmt:: Display for Error {
@@ -74,6 +76,7 @@ impl fmt::Display for Error {
7476 "entropy was not between 128-256 bits or not a multiple of 32 bits: {} bits" , c,
7577 ) ,
7678 Error :: InvalidChecksum => write ! ( f, "the mnemonic has an invalid checksum" ) ,
79+ Error :: AmbiguousWordList ( ref langs) => write ! ( f, "ambiguous word list: {:?}" , langs) ,
7780 }
7881 }
7982}
@@ -216,47 +219,50 @@ impl Mnemonic {
216219 Ok ( ( ) )
217220 }
218221
219- /// Guess the language of the mnemonic based on the first word.
222+ /// Determine the language of the mnemonic based on the first word.
220223 ///
221- /// This works as official word lists are made as such that a word never
222- /// appears in two different word lists.
223- pub fn guess_language ( s : & str ) -> Result < Language , Error > {
224- let languages = [
225- Language :: English ,
226- #[ cfg( feature = "chinese-simplified" ) ]
227- Language :: SimplifiedChinese ,
228- #[ cfg( feature = "chinese-traditional" ) ]
229- Language :: TraditionalChinese ,
230- #[ cfg( feature = "czech" ) ]
231- Language :: Czech ,
232- #[ cfg( feature = "french" ) ]
233- Language :: French ,
234- #[ cfg( feature = "italian" ) ]
235- Language :: Italian ,
236- #[ cfg( feature = "japanese" ) ]
237- Language :: Japanese ,
238- #[ cfg( feature = "korean" ) ]
239- Language :: Korean ,
240- #[ cfg( feature = "spanish" ) ]
241- Language :: Spanish ,
242- ] ;
224+ /// Some word lists don't guarantee that their words don't occur in other
225+ /// word lists. In the extremely unlikely case that a word list can be
226+ /// interpreted in multiple languages, an [Error::AmbiguousWordList] is
227+ /// returned, containing the possible languages.
228+ pub fn language_of ( s : & str ) -> Result < Language , Error > {
229+ // First we try wordlists that have guaranteed unique words.
243230 let first_word = s. split_whitespace ( ) . next ( ) . unwrap ( ) ;
244231 if first_word. len ( ) == 0 {
245232 return Err ( Error :: BadWordCount ( 0 ) ) ;
246233 }
247- for language in & languages {
234+ for language in Language :: all ( ) . iter ( ) . filter ( |l| l . unique_words ( ) ) {
248235 if language. find_word ( first_word) . is_some ( ) {
249236 return Ok ( * language) ;
250237 }
251238 }
252- Err ( Error :: UnknownWord ( first_word. to_owned ( ) ) )
239+
240+ // If that didn't work, we start with all possible languages
241+ // (those without unique words), and eliminate until there is
242+ // just one left.
243+ let mut langs: Vec < _ > =
244+ Language :: all ( ) . iter ( ) . filter ( |l| !l. unique_words ( ) ) . cloned ( ) . collect ( ) ;
245+ for word in s. split_whitespace ( ) {
246+ langs. retain ( |l| l. find_word ( word) . is_some ( ) ) ;
247+
248+ // If there is just one language left, return it.
249+ if langs. len ( ) == 1 {
250+ return Ok ( langs[ 0 ] ) ;
251+ }
252+
253+ // If all languages were eliminated, it's an invalid word.
254+ if langs. is_empty ( ) {
255+ return Err ( Error :: UnknownWord ( word. to_owned ( ) ) )
256+ }
257+ }
258+ Err ( Error :: AmbiguousWordList ( langs) )
253259 }
254260
255261 /// Parse a mnemonic and detect the language from the enabled languages.
256262 pub fn parse < ' a , S : Into < Cow < ' a , str > > > ( s : S ) -> Result < Mnemonic , Error > {
257263 let mut cow = s. into ( ) ;
258264 Mnemonic :: normalize_utf8_cow ( & mut cow) ;
259- let language = Mnemonic :: guess_language ( cow. as_ref ( ) ) ?;
265+ let language = Mnemonic :: language_of ( cow. as_ref ( ) ) ?;
260266 Mnemonic :: validate_in ( language, cow. as_ref ( ) ) ?;
261267 Ok ( Mnemonic ( cow. into_owned ( ) ) )
262268 }
@@ -309,7 +315,7 @@ impl Mnemonic {
309315 // We unwrap errors here because this method can only be called on
310316 // values that were already previously validated.
311317
312- let language = Mnemonic :: guess_language ( self . as_str ( ) ) . unwrap ( ) ;
318+ let language = Mnemonic :: language_of ( self . as_str ( ) ) . unwrap ( ) ;
313319
314320 // Preallocate enough space for the longest possible word list
315321 let mut entropy = Vec :: with_capacity ( 33 ) ;
@@ -361,6 +367,15 @@ mod tests {
361367
362368 use bitcoin_hashes:: hex:: FromHex ;
363369
370+ #[ cfg( feature = "rand" ) ]
371+ #[ test]
372+ fn test_language_of ( ) {
373+ for lang in Language :: all ( ) {
374+ let m = Mnemonic :: generate_in ( * lang, 24 ) . unwrap ( ) ;
375+ assert_eq ! ( * lang, Mnemonic :: language_of( m. as_str( ) ) . unwrap( ) ) ;
376+ }
377+ }
378+
364379 #[ test]
365380 fn test_vectors_english ( ) {
366381 // These vectors are tuples of
0 commit comments