@@ -4,9 +4,15 @@ import { cuss } from 'cuss'
44import { cuss as cussPt } from 'cuss/pt'
55import { cuss as cussFr } from 'cuss/fr'
66import { cuss as cussEs } from 'cuss/es'
7- import { Language } from '@horizon-rs/language-guesser'
7+ let language : any = null
88
9- const language = new Language ( )
9+ async function getLanguageInstance ( ) {
10+ if ( ! language ) {
11+ const { Language } = await import ( '@horizon-rs/language-guesser' )
12+ language = new Language ( )
13+ }
14+ return language
15+ }
1016
1117// Exported for the debugging CLI script
1218export const SIGNAL_RATINGS = [
@@ -48,8 +54,8 @@ export const SIGNAL_RATINGS = [
4854 {
4955 reduction : 0.2 ,
5056 name : 'not-language' ,
51- validator : ( comment : string , commentLanguage : string ) =>
52- isNotLanguage ( comment , commentLanguage ) ,
57+ validator : async ( comment : string , commentLanguage : string ) =>
58+ await isNotLanguage ( comment , commentLanguage ) ,
5359 } ,
5460 {
5561 reduction : 0.3 ,
@@ -80,7 +86,8 @@ export async function getGuessedLanguage(comment: string) {
8086 return
8187 }
8288
83- const bestGuess = language . guessBest ( comment . trim ( ) , [ ] )
89+ const lang = await getLanguageInstance ( )
90+ const bestGuess = lang . guessBest ( comment . trim ( ) , [ ] )
8491 if ( ! bestGuess ) return // Can happen if the text is just whitespace
8592 // // @horizon -rs/language-guesser is based on tri-grams and can lead
8693 // // to false positives. For example, it thinks that 'Thamk you ❤️🙏' is
@@ -98,7 +105,7 @@ export async function analyzeComment(text: string, commentLanguage = 'en') {
98105 const signals = [ ]
99106 let rating = 1.0
100107 for ( const { reduction, name, validator } of SIGNAL_RATINGS ) {
101- if ( validator ( text , commentLanguage ) ) {
108+ if ( await validator ( text , commentLanguage ) ) {
102109 signals . push ( name )
103110 rating -= reduction
104111 }
@@ -153,8 +160,9 @@ function isSingleWord(text: string) {
153160 return whitespaceSplit . length === 1
154161}
155162
156- function isNotLanguage ( text : string , language_ : string ) {
157- const bestGuess = language . guessBest ( text . trim ( ) , [ ] )
163+ async function isNotLanguage ( text : string , language_ : string ) {
164+ const lang = await getLanguageInstance ( )
165+ const bestGuess = lang . guessBest ( text . trim ( ) , [ ] )
158166 if ( ! bestGuess ) return true // Can happen if the text is just whitespace
159167 // @horizon -rs/language-guesser is based on tri-grams and can lead
160168 // to false positives. For example, it thinks that 'Thamk you ❤️🙏' is
0 commit comments