Skip to content

Commit b62a272

Browse files
committed
feat: new module unicode
add `fixSurrogates` and `unfixSurrogates` functions
1 parent f4d83f4 commit b62a272

File tree

6 files changed

+163
-2
lines changed

6 files changed

+163
-2
lines changed

cli.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ const usageString = `
2727
shift
2828
shiftBits
2929
toMANS
30+
unicode.fixSurrogates
31+
unicode.unfixSurrogates
3032
3133
Examples
3234
$ string-mutilator shift "Hello World!" 6

readme.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ More or less just for fun.
4343
<dt><a href="#module_stringMutilator">stringMutilator</a></dt>
4444
<dd><p>Functions for mutilating strings.</p>
4545
</dd>
46+
<dt><a href="#module_stringMutilator/unicode">stringMutilator/unicode</a></dt>
47+
<dd><p>Functions for handle unicode stuff.</p>
48+
</dd>
4649
</dl>
4750

4851
## Functions
@@ -240,6 +243,39 @@ stringMutilator.compressor.signature('䡥汬漠坯牬搡');
240243
## stringMutilator
241244
Functions for mutilating strings.
242245

246+
<a name="module_stringMutilator/unicode"></a>
247+
248+
## stringMutilator/unicode
249+
Functions for handle unicode stuff.
250+
251+
252+
* [stringMutilator/unicode](#module_stringMutilator/unicode)
253+
* [~fixSurrogates(string)](#module_stringMutilator/unicode..fixSurrogates) ⇒ <code>string</code>
254+
* [~unfixSurrogates(string)](#module_stringMutilator/unicode..unfixSurrogates) ⇒ <code>string</code>
255+
256+
<a name="module_stringMutilator/unicode..fixSurrogates"></a>
257+
258+
### stringMutilator/unicode~fixSurrogates(string) ⇒ <code>string</code>
259+
Fix unpaired high/low surrogates by adding a blank high/low surrogate
260+
(U+D800 or U+DC00) to the required location.
261+
262+
**Kind**: inner method of [<code>stringMutilator/unicode</code>](#module_stringMutilator/unicode)
263+
264+
| Param | Type | Description |
265+
| --- | --- | --- |
266+
| string | <code>string</code> | The input string |
267+
268+
<a name="module_stringMutilator/unicode..unfixSurrogates"></a>
269+
270+
### stringMutilator/unicode~unfixSurrogates(string) ⇒ <code>string</code>
271+
Remove the by `fixSurrogates` added blank high/low surrogates.
272+
273+
**Kind**: inner method of [<code>stringMutilator/unicode</code>](#module_stringMutilator/unicode)
274+
275+
| Param | Type | Description |
276+
| --- | --- | --- |
277+
| string | <code>string</code> | The input string |
278+
243279
<a name="flipBits"></a>
244280

245281
## flipBits(string) ⇒ <code>string</code>
@@ -532,6 +568,8 @@ $ string-mutilator --help
532568
shift
533569
shiftBits
534570
toMANS
571+
unicode.fixSurrogates
572+
unicode.unfixSurrogates
535573
536574
Examples
537575
$ string-mutilator shift "Hello World!" 6

src/index.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import scramble from './scramble';
1616
import shift from './shift';
1717
import shiftBits from './shift-bits';
1818
import toMANS from './to-mans';
19+
import * as unicode from './unicode';
1920

2021
export default
2122
{
@@ -31,7 +32,8 @@ export default
3132
scramble,
3233
shift,
3334
shiftBits,
34-
toMANS
35+
toMANS,
36+
unicode
3537
};
3638

3739
export
@@ -48,5 +50,6 @@ export
4850
scramble,
4951
shift,
5052
shiftBits,
51-
toMANS
53+
toMANS,
54+
unicode
5255
};

src/unicode.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* Functions for handle unicode stuff.
3+
* @module stringMutilator/unicode
4+
*/
5+
6+
/**
7+
* Checks if the given `char` is a high surrogate (U+D800 - U+DBFF).
8+
* @private
9+
* @param {string} char
10+
* @returns {boolean}
11+
*/
12+
const isHighSurrogate = char => (/^[\uD800-\uDBFF]$/.test(char));
13+
14+
/**
15+
* Checks if the given `char` is a low surrogate (U+DC00 - U+DFFF).
16+
* @private
17+
* @param {string} char
18+
* @returns {boolean}
19+
*/
20+
const isLowSurrogate = char => (/^[\uDC00-\uDFFF]$/.test(char));
21+
22+
/**
23+
* Fix unpaired high/low surrogates by adding a blank high/low surrogate
24+
* (U+D800 or U+DC00) to the required location.
25+
* @param {string} string The input string
26+
* @returns {string}
27+
*/
28+
const fixSurrogates = string =>
29+
{
30+
let result = string;
31+
32+
for (let i = 0; i < result.length; i++)
33+
{
34+
if (isHighSurrogate(result[i])
35+
&& (i + 1 === result.length || !isLowSurrogate(result[i + 1])))
36+
{
37+
result = result.slice(0, i + 1)
38+
+ String.fromCharCode(0xDC00)
39+
+ result.slice(i + 1);
40+
i++;
41+
}
42+
else if (isLowSurrogate(result[i])
43+
&& (i === 0 || !isHighSurrogate(result[i - 1])))
44+
{
45+
result = result.slice(0, i)
46+
+ String.fromCharCode(0xD800)
47+
+ result.slice(i);
48+
i++;
49+
}
50+
}
51+
52+
return result;
53+
};
54+
55+
/**
56+
* Remove the by `fixSurrogates` added blank high/low surrogates.
57+
* @param {string} string The input string
58+
* @returns {string}
59+
*/
60+
const unfixSurrogates = string =>
61+
string.replace(/([\uD800|\uDC00])/g, '');
62+
63+
export default {
64+
fixSurrogates,
65+
unfixSurrogates
66+
};
67+
68+
export {
69+
fixSurrogates,
70+
unfixSurrogates
71+
};

template-readme.hbs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ $ string-mutilator --help
112112
shift
113113
shiftBits
114114
toMANS
115+
unicode.fixSurrogates
116+
unicode.unfixSurrogates
115117

116118
Examples
117119
$ string-mutilator shift "Hello World!" 6

test/unicode.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import test from 'ava';
2+
import unicode from '../src/unicode.js';
3+
4+
let highSurrogates = Array.from({ length: 1024 }, (v, i) => i + 0xD800);
5+
let lowSurrogates = Array.from({ length: 1024 }, (v, i) => i + 0xDC00);
6+
7+
let hs = highSurrogates
8+
// Exclude U+D800 because we use it as filler
9+
.slice(1)
10+
.map(code => String.fromCodePoint(code))
11+
.join('');
12+
13+
let ls = lowSurrogates
14+
// Exclude U+DC00 because we use it as filler
15+
.slice(1)
16+
.map(code => String.fromCodePoint(code))
17+
.join('');
18+
19+
let hsf = unicode.fixSurrogates(hs);
20+
let hsu = unicode.unfixSurrogates(hsf);
21+
22+
test(
23+
`High surrogates fixed length `,
24+
t => t.is(hsf.length, hs.length * 2)
25+
);
26+
27+
test(
28+
`High surrogates unfixed length`,
29+
t => t.is(hsu.length, hs.length)
30+
);
31+
32+
let lsf = unicode.fixSurrogates(ls);
33+
let lsu = unicode.unfixSurrogates(lsf);
34+
35+
test(
36+
`Low surrogates fixed length`,
37+
t => t.is(lsf.length, ls.length * 2)
38+
);
39+
40+
test(
41+
`Low surrogates unfixed length`,
42+
t => t.is(lsu.length, ls.length)
43+
);
44+
45+
// TODO: Add more detailed tests if necessary

0 commit comments

Comments
 (0)