diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml new file mode 100644 index 0000000..4426529 --- /dev/null +++ b/.github/workflows/node.js.yml @@ -0,0 +1,31 @@ +# This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs + +name: Setup Modules + +on: + push: + branches: [ "BadFilter.js-V1.0" ] + pull_request: + branches: [ "BadFilter.js-V1.0" ] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + node-version: [18.x, 20.x, 22.x] + # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ + + steps: + - uses: actions/checkout@v3 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node-version }} + cache: 'npm' + - run: npm ci + - run: npm run build --if-present + - run: npm test diff --git a/README.md b/README.md index 8f8de65..ab72b79 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,101 @@ - +## ๐ŸŽ‰ **Welcome to BadFilter.js!** ๐ŸŽ‰ -```Javascript hl_lines="4 9-12 25-27" +Tired of offensive language in your app? ๐Ÿšซ **BadFilter.js** to the rescue! Weโ€™ve crafted a supercharged, customizable solution that helps developers filter out inappropriate words like a pro. Let's make the internet a friendlier place one word at a time! ๐ŸŒ๐Ÿ’ฌ + +--- + +## ๐Ÿ› ๏ธ **Overview** + +**BadFilter.js** offers two powerful components: +- **FilterBadWord Class**: The brains of the operation! This utility class is responsible for filtering unwanted words from your text. +- **filters_badword Function**: A lightweight wrapper around `FilterBadWord` for easier, more streamlined usage. Think of it as the "express mode" for your text-filtering needs! + +With these components, you can easily cleanse your text, ensuring a respectful, safe online environment. ๐Ÿš€โœจ + +--- + +## ๐Ÿ”ง **How It Works** +Both **FilterBadWord** and **filters_badword** combine Natural Language Processing (NLP) and Machine Learning (ML) for *accurate* and *intelligent* word filtering. + +Here's the breakdown of the magic: +1. **Tokenization**: Your text gets split into individual words (tokens) ๐Ÿงฉ. +2. **NLP Analysis**: The tokens are scanned using cutting-edge NLP algorithms to sniff out those pesky bad words ๐Ÿ•ต๏ธโ€โ™‚๏ธ. +3. **Machine Learning**: Filtering gets smarter with each interaction, ensuring those inappropriate words vanish into thin air โœจ. +4. **Customization**: Want to fine-tune the filter to match your app's personality? You got it! Use the handy config method to tweak things to perfection ๐ŸŽ›๏ธ. + +--- + +## ๐Ÿš€ **Getting Started** + +### 1๏ธโƒฃ **HTML Setup** +Want to use BadFilter.js in your web app? No problem! Hereโ€™s how: +```html + + + + + + +``` +Easy peasy, right? ๐Ÿ‹ + +### 2๏ธโƒฃ **Node.js Setup** +If youโ€™re working on a Node.js project, just use this: +```javascript +const { FilterBadWord, filters_badword } = require('./badword.js'); + +// Express filtering using filters_badword +const badfilter = new FilterBadWord("FUck master"); +console.log(badfilter.clean(badfilter.position())); // Output: ***** master +``` + +### ๐ŸŽฏ **Pro Mode: filters_badword Class** +For full control over filtering, dive into the **filters_badword** class: +```javascript const badfilt = new filters_badword(); -badfilt.config(true, false); //accpet filter and error print -badfilt.words_o("FUck master"); -console.log( badfilt.cleans ); -console.log( badfilt.position() ); -//result ***** master - -badfilt.words_o("motherfucker"); -console.log( badfilt.cleans ); -console.log( badfilt.position() ); -//result ************* +badfilt.config(true, false); // Custom filtering settings +badfilt.text_o("FUck master"); // Censored output: ***** master +console.log(badfilt.cleans); +console.log(badfilt.position()); // Shows position of the bad word + +badfilt.text_o("motherfucker"); +console.log(badfilt.cleans); // Output: ************* +console.log(badfilt.position()); ``` + +๐Ÿ”ง **Config it your way!** Customize how strict or lenient your filter should be using `.config()`. Want more control? You got it! + +--- + +## ๐Ÿ’ก **Features You'll Love** + +### ๐Ÿง  **Smart Filtering** +Thanks to the NLP + ML combo, your filters get sharper over time, catching even the sneakier bad words. Say goodbye to offensive content! ๐Ÿ™…โ€โ™‚๏ธ๐Ÿšซ + +### ๐ŸŽจ **Fully Customizable** +Want to allow certain words or apply extra strict filtering? Adjust it with ease. Youโ€™re in the driverโ€™s seat of how clean your app should be. Customize the experience and make it yours! ๐Ÿ› ๏ธ + +### โšก **Blazing Fast** +We know speed matters! BadFilter.js is optimized to perform like a ninjaโ€”fast and precise. Your users wonโ€™t even notice the filtering happening in the background. ๐ŸŒช๏ธ + +--- + +## ๐Ÿค” **Why Choose BadFilter.js?** + +- **Accuracy**: Spot-on detection of offensive words using cutting-edge algorithms ๐ŸŽฏ. +- **Customizable**: Shape the filter to match your appโ€™s personality ๐ŸŽ›๏ธ. +- **Safe Spaces**: Build a more positive, respectful community ๐ŸŒธ. + +Whether you're building a chat app, a forum, or an online game, **BadFilter.js** will ensure everyone has a good time without the drama! ๐ŸŽ‰ + +--- + +## ๐ŸŽฌ **Wrap-Up** + +With **BadFilter.js**, you're one step closer to making the internet a more positive place. Say goodbye to offensive language and hello to a world of friendly interactions. Let's get filtering, and have some fun while we're at it! โœจ + +Go on, give it a spin and watch the magic unfold in your app! ๐Ÿš€ diff --git a/badword.js b/badword.js index deb1d89..ae27097 100644 --- a/badword.js +++ b/badword.js @@ -1,13 +1,80 @@ +/*! + * BadFilter.js - A JavaScript utility for filtering offensive or unwanted words + * Copyright (c) 2023 LcfherShell + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + + +function escapeRegExp(strings){ + let data = strings.trim().toLowerCase().split("|").filter(Boolean); + for (let index = 0; index < data.length; index++) { + const element = data[index]; + if (!((element.includes("(") && element.includes(")")) || + (element.includes("[") && element.includes("]")) ) ){ + data[index] = data[index].replace(/[.*+?^${}()|[\]\\]/g, '\\$&'). + replace(/[a4]/g, "[a4]").replace(/[s5]/g, "[s5]").replace("i", "[i1]"). + replace("l", "[l1]").replace(/[o0]/g, "[o0]").replace(/[e3]/g, "[e3]"). + replace(/[b8]/g, "[b8]").replace(/[kx]/g, "[kx]"); + }; + } + data = new RegExp(data.join("|")); + return data.source; +}; + + +function validateInput(type, value) { + let regex; + switch (type) { + case 'email': + // Regex kompleks untuk email + regex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.(com|net|org|edu|gov|mil|co|info|io|biz|id|us|uk|ca|au|de|fr|es|it|jp|cn|br|in|ru|mx|kr|za|nl|se|no|fi|dk|pl|pt|ar|ch|hk|sg|my|th|vn|ae|at|be|cz|hu|ro|bg|gr|lt|lv|sk|si|ee|cy)(\.[a-zA-Z]{2,})?$/; + break; + case 'phone': + // Regex kompleks untuk nomor telepon (contoh: +1-234-567-8900, (123) 456-7890, 123-456-7890, 1234567890) + regex = /^(?:\+?(\d{1,3}))?[-. ]?(\(?\d{1,4}?\)?)[-.\s]?(\d{1,4})[-.\s]?(\d{1,4})[-.\s]?(\d{1,9})$/; + break; + case 'url': + // Regex kompleks untuk URL + regex = /^(https?:\/\/)?(www\.)?([a-zA-Z0-9-]+\.[a-zA-Z]{2,})(\/[^\s]*)?$/; + break; + default: + return false; // Tipe tidak valid + } + return regex.test(value); +}; class FilterBadWord{ - constructor(word = ""){ + constructor(text = "", customFilter="", customSubFilter=""){ - this.word = word; + this._text = text; - this.filt = /bashfull*|kill*|fuck*|drug*|dick*|fk/gi; + this._filt = /[b8][[a4][s5]hfu[l1][l1]*|k[i1][l1][l1]*|fuck*|dr[uo]g*|d[i1]ck*|fk/gi; - this.subfilter = /ass|lip|pussy*|suck*|mother*|mom*|dog*|low*|sex*/gi; - + this._subfilter = /ass|lip|pussy*|suck*|mother*|mom*|dog*|low*|sex*/gi; + if (customFilter.length>3){ + this._filt = new RegExp(this._filt.source+"|"+escapeRegExp(customFilter), "gi"); + }; + if (customSubFilter.length>3){ + this._subfilter = new RegExp(this._subfilter.source+"|"+escapeRegExp(customSubFilter), "gi"); + }; } @@ -72,7 +139,7 @@ class FilterBadWord{ //if ( typeof position != "number" ) { //position = parseInt(position); //} - this.positionList = this.constructor.position_static(this.word.toString(), this.filt); + this.positionList = this.constructor.position_static(this._text.toString(), this._filt); return this.positionList; @@ -92,7 +159,7 @@ class FilterBadWord{ if (check != null || check != 0) { - var word = this.word.toLowerCase(); + var word = this._text.toLowerCase(); function before_str(number , key){ @@ -107,11 +174,11 @@ class FilterBadWord{ for (var i = 0; i < check.length; i++) { - const word_s = this.constructor.getboundPosition(this.word.toLowerCase().toString() , check[i]); + const word_s = this.constructor.getboundPosition(this._text.toLowerCase().toString() , check[i]); before = before_str(0 , word_s).toString().split(" "); - after = after_Str(word_s, this.word).toString().split(" "); + after = after_Str(word_s, this._text).toString().split(" "); //console.log(word.indexOf(word_s)); if (after.length >= 1 ){ @@ -141,9 +208,9 @@ class FilterBadWord{ try{ - if (before[before.length-1].match(this.subfilter) != null) { + if (before[before.length-1].match(this._subfilter) != null) { - check_repr = before[before.length-1].match(this.subfilter); + check_repr = before[before.length-1].match(this._subfilter); if (check_repr != before[before.length-1]) { //check ulang jika sensore tidak memenuhi persyaratan @@ -164,9 +231,9 @@ class FilterBadWord{ } - else if (after[0].match(this.subfilter) != null){ + else if (after[0].match(this._subfilter) != null){ - check_repr = after[0].match(this.subfilter); + check_repr = after[0].match(this._subfilter); if (check_repr != after[0]) { @@ -187,9 +254,9 @@ class FilterBadWord{ } - else if (after[1].match(this.subfilter) != null){ + else if (after[1].match(this._subfilter) != null){ - check_repr = after[1].match(this.subfilter); + check_repr = after[1].match(this._subfilter); if (check_repr != after[1]) { arry.push("Toxic"); @@ -214,7 +281,7 @@ class FilterBadWord{ } catch(err){ - if ( this.word.match(this.filt) != null) { + if ( this._text.match(this._filt) != null) { arry.push("Toxic"); @@ -256,23 +323,23 @@ class FilterBadWord{ var word, process, output, sensore; - word = this.word.split(" "); + word = this._text.split(" "); sensore = "*"; process = position.forEach( number => { - const get_word = this.constructor.getboundPosition(this.word.toString() , number); + const get_word = this.constructor.getboundPosition(this._text.toString() , number); for (var i = 0; i < word.length; i++) { - for (var x = 0; x < get_word.length; x++) { + for (var x = 0; x < get_word.length-1; x++) { sensore += "*"; }; - word[i] = word[i].replace(get_word, sensore); + if (!(validateInput("email", word[i]) || validateInput("url", word[i]))) word[i] = word[i].replace(get_word, sensore); sensore = "*"; @@ -286,7 +353,7 @@ class FilterBadWord{ //position.forEach( async(number) => { - //const get_word = await this.constructor.getboundPosition(this.word.toString() , number); + //const get_word = await this.constructor.getboundPosition(this._text.toString() , number); //for (var i = 0; i < word.length; i++) { @@ -306,17 +373,22 @@ class FilterBadWord{ class filters_badword extends FilterBadWord{ - ['words_o'](word){ + ['text_o'](text){ - this.word = word.toString(); + this._text = text.toString(); } - ['config'](cl=true, smart=true, er=false){ + ['config'](cl=true, smart=true, customFilter="", customSubFilter=""){ this.cl = cl; this.st = smart; - this.er = er; + if (customFilter.length>3){ + this._filt = new RegExp(this._filt.source+"|"+escapeRegExp(customFilter), "gi"); + }; + if (customSubFilter.length>3){ + this._subfilter = new RegExp(this._subfilter.source+"|"+escapeRegExp(customSubFilter), "gi"); + }; } get ['cleans'](){ @@ -345,7 +417,7 @@ class filters_badword extends FilterBadWord{ } else{ - return this.word.trim(); + return this._text.trim(); } @@ -357,4 +429,40 @@ class filters_badword extends FilterBadWord{ } +}; +// Definisikan objek ekspor +const exportsObject = { + /** + * FilterBadWord class: class for filtering bad words + *@param {string} text - The text to filter + *@param {string} customFilter - List of bad words + *@param {string} customSubFilter - List of bad sub words + */ + FilterBadWord, + /** + * filters_badword class: a simpler class to filter bad words + * which uses the FilterBadWord class. To use it you have to call the config function + */ + filters_badword +}; + +// Periksa lingkungan eksekusi +const isNode = typeof exports === 'object' && typeof module !== 'undefined'; + +// Ekspor ke lingkungan yang sesuai +//isNode ? module.exports = exportsObject : Object.assign(window, exportsObject); +// Ekspor ke lingkungan yang sesuai +if (isNode) { + // Jika di Node.js, gunakan module.exports + module.exports = exportsObject; +} else { + // Jika di browser, periksa dukungan ES Modules + if (typeof window.customElements !== "undefined") { + // Dukungan untuk ES Modules, gunakan `export` + window.exportsObject = exportsObject; // Menyimpan di objek window + Object.assign(window, exportsObject); + } else { + // Jika tidak mendukung ES Modules, gunakan Object.assign + Object.assign(window, exportsObject); + } } diff --git a/badword.ts b/badword.ts new file mode 100644 index 0000000..8c40c06 --- /dev/null +++ b/badword.ts @@ -0,0 +1,213 @@ +/*! + * BadFilter.js - A JavaScript utility for filtering offensive or unwanted words + * Copyright (c) 2023 LcfherShell + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + + +function escapeRegExp(strings: string): string { + let data = strings.trim().toLowerCase().split("|").filter(Boolean); + for (let index = 0; index < data.length; index++) { + const element = data[index]; + if (!((element.includes("(") && element.includes(")")) || + (element.includes("[") && element.includes("]")))) { + data[index] = data[index] + .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + .replace(/[a4]/g, "[a4]") + .replace(/[s5]/g, "[s5]") + .replace("i", "[i1]") + .replace("l", "[l1]") + .replace(/[o0]/g, "[o0]") + .replace(/[e3]/g, "[e3]") + .replace(/[b8]/g, "[b8]") + .replace(/[kx]/g, "[kx]"); + } + } + return new RegExp(data.join("|")).source; +} + +class FilterBadWord { + protected _text: string; // Changed to protected to allow access in subclasses + protected _filt: RegExp; // Changed to protected + protected _subfilter: RegExp; // Changed to protected + + constructor(text: string = "", customFilter: string = "", customSubFilter: string = "") { + this._text = text; + this._filt = /[b8][[a4][s5]hfu[l1][l1]*|k[i1][l1][l1]*|fuck*|dr[uo]g*|d[i1]ck*|fk/gi; + this._subfilter = /[a4][s5][s5]|[l1][i1]p|pu[s5][s5]y[*]?|[s5]uck[*]?|m[o0]th[e3]r[*]?|m[o0]m[*]?|d[o0]g[*]?|l[o0]w[*]?|s[e3]x[*]?/gi; + + if (customFilter.length > 3) { + this._filt = new RegExp(this._filt.source + "|" + escapeRegExp(customFilter), "gi"); + } + if (customSubFilter.length > 3) { + this._subfilter = new RegExp(this._subfilter.source + "|" + escapeRegExp(customSubFilter), "gi"); + } + } + + private getBoundPosition(position: number): string { + let paragraph = this._text; + while (position > 0 && paragraph[position] === " ") position--; + position = paragraph.lastIndexOf(" ", position) + 1; + let end = paragraph.indexOf(" ", position); + if (end === -1) { + end = paragraph.length; + } + return paragraph.substring(position, end); + } + + private positionStatic(): number[] { + const wordList = this._text.toLowerCase().split(' '); + const positions: number[] = []; + + wordList.forEach((word, index) => { + if (word.match(this._filt)) { + positions.push(index); + } + }); + + return positions; + } + + public position(): number[] { + return this.positionStatic(); + } + + public get thisToxic(): (string | number)[] | false { + const check = this.position(); + const arry: (string | number)[] = []; + + if (check.length > 0) { + const word = this._text.toLowerCase(); + + for (const index of check) { + const wordBoundary = this.getBoundPosition(index); + const before = word.substring(0, word.indexOf(wordBoundary)).trim().split(" "); + const after = word.substring(word.indexOf(wordBoundary) + wordBoundary.length).trim().split(" "); + + if (before.length && before[before.length - 1].match(this._subfilter)) { + arry.push("Toxic", 1, before[before.length - 1]); + return arry; + } + + if (after.length && after[0].match(this._subfilter)) { + arry.push("Toxic", 1, after[0]); + return arry; + } + + if (after.length > 1 && after[1].match(this._subfilter)) { + arry.push("Toxic", 1, after[1]); + return arry; + } + } + + arry.push("Notoxic", 0); + return arry; + } + + return false; + } + + set thisToxic(key: any) { + throw key; + } + + public clean(position: number[]): string { + let words = this._text.split(" "); + const sensor = "*"; + + position.forEach((number) => { + const getWord = this.getBoundPosition(number); + words = words.map(word => word.replace(getWord, sensor.repeat(getWord.length))); + }); + + return words.join(" "); + } +} + +class filters_badword extends FilterBadWord { + protected cl: boolean; + protected st: boolean; + + constructor(cl: boolean = true, st: boolean = true) { + super(); // Memanggil konstruktor kelas induk + this.cl = cl; // Inisialisasi properti cl + this.st = st; // Inisialisasi properti st + } + + public text_o(text: string): void { + this._text = text.toString(); + } + + public config(cl: boolean = true, smart: boolean = true, customFilter: string = "", customSubFilter: string = ""): void { + this.cl = cl; + this.st = smart; + + if (customFilter.length > 3) { + this._filt = new RegExp(this._filt.source + "|" + escapeRegExp(customFilter), "gi"); + } + if (customSubFilter.length > 3) { + this._subfilter = new RegExp(this._subfilter.source + "|" + escapeRegExp(customSubFilter), "gi"); + } + } + + public get cleans(): string { + if (this.cl) { + const toxicResult = this.thisToxic; // Simpan hasil dalam variabel + + // Pastikan toxicResult adalah array dan tidak false + if (Array.isArray(toxicResult) && toxicResult[1] === 1 && toxicResult.length > 2) { + const toxicWord = toxicResult[2]; + + // Pastikan toxicWord adalah string + if (typeof toxicWord === 'string') { + const sensore = "*".repeat(toxicWord.length); + // Memanggil clean dengan hasil posisi yang telah dibersihkan + return this.clean(this.position()).replace(toxicWord, sensore); + } + } + + // Kembali bersih jika tidak ada kata yang terdeteksi + return this.clean(this.position()); + } else { + return this._text.trim(); + } + } + + + set cleans(value: string) { + throw value; + } +} + +export { + /** + * FilterBadWord class: class for filtering bad words + *@param {string} text - The text to filter + *@param {string} customFilter - List of bad words + *@param {string} customSubFilter - List of bad sub words + */ + FilterBadWord, + /** + * filters_badword class: a simpler class to filter bad words + * which uses the FilterBadWord class. To use it you have to call the config function + */ + filters_badword +}; diff --git a/package.json b/package.json new file mode 100644 index 0000000..2aa47cd --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "name": "BadFilter.js", + "version": "1.0.0", + "type": "module", + "description": "This script functions to detect and clean abusive or toxic words in text.", + "main": "badword.js", + "scripts": { + "test": "mocha", + }, + "devDependencies": { + "mocha": "^10.0.0", + "chai": "^4.3.0" + }, + "keywords": ["filter", "words", "profanity", "abusive", "toxic", "censored"], + "author": "LcfherShell", + "license": "MIT", + "engines": { + "node": ">=8.0.0" + } +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..cff4f60 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "es6", + "module": "commonjs", + "outDir": "./", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "declaration": true + }, + "include": [ + "*.ts" + ], + "exclude": [ + "node_modules" + ] +}