66
77import org .togetherjava .tjbot .config .Config ;
88import org .togetherjava .tjbot .config .ScamBlockerConfig ;
9- import org .togetherjava .tjbot .features .utils .StringDistances ;
109
11- import java .net .URI ;
1210import java .util .Collection ;
1311import java .util .List ;
14- import java .util .Locale ;
15- import java .util .StringJoiner ;
1612import java .util .function .Predicate ;
1713import java .util .regex .Pattern ;
1814import java .util .stream .Stream ;
@@ -28,6 +24,7 @@ public final class ScamDetector {
2824 private final ScamBlockerConfig config ;
2925 private final Predicate <String > isSuspiciousAttachmentName ;
3026 private final Predicate <String > hasTrustedRole ;
27+ private final TokenAnalyse tokenAnalyse ;
3128
3229 /**
3330 * Creates a new instance with the given configuration
@@ -42,6 +39,8 @@ public ScamDetector(Config config) {
4239 .asMatchPredicate ();
4340 hasTrustedRole =
4441 Pattern .compile (this .config .getTrustedUserRolePattern ()).asMatchPredicate ();
42+
43+ tokenAnalyse = new TokenAnalyse (this .config );
4544 }
4645
4746 /**
@@ -59,10 +58,11 @@ public boolean isScam(Message message) {
5958 }
6059
6160 String content = message .getContentDisplay ();
62- List <Message .Attachment > attachments = message .getAttachments ();
61+ List <Attachment > attachments =
62+ message .getAttachments ().stream ().map (Attachment ::fromDiscord ).toList ();
6363
6464 if (content .isBlank ()) {
65- return areAttachmentsSuspicious (attachments );
65+ return areAttachmentsScam (attachments );
6666 }
6767
6868 return isScam (content );
@@ -76,158 +76,36 @@ public boolean isScam(Message message) {
7676 */
7777 public boolean isScam (CharSequence message ) {
7878 AnalyseResults results = new AnalyseResults ();
79- TOKENIZER .splitAsStream (message ).forEach (token -> analyzeToken (token , results ));
79+ TOKENIZER .splitAsStream (message ).forEach (token -> tokenAnalyse . analyze (token , results ));
8080 return isScam (results );
8181 }
8282
8383 private boolean isScam (AnalyseResults results ) {
84- if (results .pingsEveryone && (results .containsSuspiciousKeyword || results .hasUrl
85- || results .containsDollarSign )) {
84+ if (results .pingsEveryone () && (results .containsSuspiciousKeyword () || results .hasUrl ()
85+ || results .containsDollarSign () )) {
8686 return true ;
8787 }
8888
89- return Stream
90- .of (results .containsSuspiciousKeyword , results .hasSuspiciousUrl ,
91- results .containsDollarSign )
89+ boolean hasTooManySuspiciousFlags = Stream
90+ .of (results .containsSuspiciousKeyword () , results .hasSuspiciousUrl () ,
91+ results .containsDollarSign () )
9292 .filter (flag -> flag )
9393 .count () >= 2 ;
94- }
95-
96- private void analyzeToken (String token , AnalyseResults results ) {
97- if (token .isBlank ()) {
98- return ;
99- }
100-
101- if (!results .pingsEveryone
102- && ("@everyone" .equalsIgnoreCase (token ) || "@here" .equalsIgnoreCase (token ))) {
103- results .pingsEveryone = true ;
104- }
105-
106- if (!results .containsSuspiciousKeyword && containsSuspiciousKeyword (token )) {
107- results .containsSuspiciousKeyword = true ;
108- }
109-
110- if (!results .containsDollarSign && (token .contains ("$" ) || "usd" .equalsIgnoreCase (token ))) {
111- results .containsDollarSign = true ;
112- }
113-
114- if (token .startsWith ("http" )) {
115- analyzeUrl (token , results );
116- }
117- }
118-
119- private void analyzeUrl (String url , AnalyseResults results ) {
120- String host ;
121- try {
122- host = URI .create (url ).getHost ();
123- } catch (IllegalArgumentException _) {
124- // Invalid urls are not scam
125- return ;
126- }
127-
128- if (host == null ) {
129- return ;
130- }
131-
132- results .hasUrl = true ;
133-
134- if (config .getHostWhitelist ().contains (host )) {
135- return ;
136- }
137-
138- if (config .getHostBlacklist ().contains (host )) {
139- results .hasSuspiciousUrl = true ;
140- return ;
141- }
142-
143- for (String keyword : config .getSuspiciousHostKeywords ()) {
144- if (isHostSimilarToKeyword (host , keyword )) {
145- results .hasSuspiciousUrl = true ;
146- break ;
147- }
94+ if (hasTooManySuspiciousFlags ) {
95+ return true ;
14896 }
149- }
15097
151- private boolean containsSuspiciousKeyword (String token ) {
152- String preparedToken = token .toLowerCase (Locale .US );
153-
154- return config .getSuspiciousKeywords ()
155- .stream ()
156- .map (keyword -> keyword .toLowerCase (Locale .US ))
157- .anyMatch (keyword -> {
158- // Exact match "^foo$"
159- if (startsWith (keyword , '^' ) && endsWith (keyword , '$' )) {
160- return preparedToken .equals (keyword .substring (1 , keyword .length () - 1 ));
161- }
162- // Simple regex-inspired syntax "^foo"
163- if (startsWith (keyword , '^' )) {
164- return preparedToken .startsWith (keyword .substring (1 ));
165- }
166- // Simple regex-inspired syntax "foo$"
167- if (endsWith (keyword , '$' )) {
168- return preparedToken .endsWith (keyword .substring (0 , keyword .length () - 1 ));
169- }
170- return preparedToken .contains (keyword );
171- });
98+ return results .onlyContainsUrls () && results .areAllUrlsWithAttachments ()
99+ && areAttachmentsScam (results .getUrlAttachments ());
172100 }
173101
174- private boolean areAttachmentsSuspicious (Collection <? extends Message . Attachment > attachments ) {
102+ private boolean areAttachmentsScam (Collection <Attachment > attachments ) {
175103 long suspiciousAttachments =
176104 attachments .stream ().filter (this ::isAttachmentSuspicious ).count ();
177105 return suspiciousAttachments >= config .getSuspiciousAttachmentsThreshold ();
178106 }
179107
180- private boolean isAttachmentSuspicious (Message .Attachment attachment ) {
181- return attachment .isImage () && isSuspiciousAttachmentName .test (attachment .getFileName ());
182- }
183-
184- private boolean isHostSimilarToKeyword (String host , String keyword ) {
185- // NOTE This algorithm is far from optimal.
186- // It is good enough for our purpose though and not that complex.
187-
188- // Rolling window of keyword-size over host.
189- // If any window has a small distance, it is similar
190- int windowStart = 0 ;
191- int windowEnd = keyword .length ();
192- while (windowEnd <= host .length ()) {
193- String window = host .substring (windowStart , windowEnd );
194- int distance = StringDistances .editDistance (keyword , window );
195-
196- if (distance <= config .getIsHostSimilarToKeywordDistanceThreshold ()) {
197- return true ;
198- }
199-
200- windowStart ++;
201- windowEnd ++;
202- }
203-
204- return false ;
205- }
206-
207- private static boolean startsWith (CharSequence text , char prefixToTest ) {
208- return !text .isEmpty () && text .charAt (0 ) == prefixToTest ;
209- }
210-
211- private static boolean endsWith (CharSequence text , char suffixToTest ) {
212- return !text .isEmpty () && text .charAt (text .length () - 1 ) == suffixToTest ;
213- }
214-
215- private static class AnalyseResults {
216- private boolean pingsEveryone ;
217- private boolean containsSuspiciousKeyword ;
218- private boolean containsDollarSign ;
219- private boolean hasUrl ;
220- private boolean hasSuspiciousUrl ;
221-
222- @ Override
223- public String toString () {
224- return new StringJoiner (", " , AnalyseResults .class .getSimpleName () + "[" , "]" )
225- .add ("pingsEveryone=" + pingsEveryone )
226- .add ("containsSuspiciousKeyword=" + containsSuspiciousKeyword )
227- .add ("containsDollarSign=" + containsDollarSign )
228- .add ("hasUrl=" + hasUrl )
229- .add ("hasSuspiciousUrl=" + hasSuspiciousUrl )
230- .toString ();
231- }
108+ private boolean isAttachmentSuspicious (Attachment attachment ) {
109+ return attachment .isImage () && isSuspiciousAttachmentName .test (attachment .fileName ());
232110 }
233111}
0 commit comments