From 1dbba833f7ce56178306704455d953c9861f30ca Mon Sep 17 00:00:00 2001 From: Albert Mir Date: Thu, 10 Nov 2022 00:23:15 +0100 Subject: [PATCH] fix: Set default tokenizer language to actual language parameter. --- rake_nltk/rake.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rake_nltk/rake.py b/rake_nltk/rake.py index b208264..6f18d41 100644 --- a/rake_nltk/rake.py +++ b/rake_nltk/rake.py @@ -8,6 +8,7 @@ import string from collections import Counter, defaultdict from enum import Enum +from functools import partial from itertools import chain, groupby, product from typing import Callable, DefaultDict, Dict, List, Optional, Set, Tuple @@ -105,7 +106,7 @@ def __init__( if sentence_tokenizer: self.sentence_tokenizer = sentence_tokenizer else: - self.sentence_tokenizer = nltk.tokenize.sent_tokenize + self.sentence_tokenizer = partial(nltk.tokenize.sent_tokenize, language=language) self.word_tokenizer: Callable[[str], List[str]] if word_tokenizer: self.word_tokenizer = word_tokenizer