@@ -15,7 +15,7 @@ var d3 = require('d3');
1515
1616var Lib = require ( '../lib' ) ;
1717var xmlnsNamespaces = require ( '../constants/xmlns_namespaces' ) ;
18- var stringMappings = require ( '../constants/string_mappings' ) ;
18+ var entityToUnicode = require ( '../constants/string_mappings' ) . entityToUnicode ;
1919var LINE_SPACING = require ( '../constants/alignment' ) . LINE_SPACING ;
2020
2121// text converter
@@ -223,13 +223,6 @@ var PROTOCOLS = ['http:', 'https:', 'mailto:', '', undefined, ':'];
223223
224224var STRIP_TAGS = new RegExp ( '</?(' + Object . keys ( TAG_STYLES ) . join ( '|' ) + ')( [^>]*)?/?>' , 'g' ) ;
225225
226- var ENTITY_TO_UNICODE = Object . keys ( stringMappings . entityToUnicode ) . map ( function ( k ) {
227- return {
228- regExp : new RegExp ( '&' + k + ';' , 'g' ) ,
229- sub : stringMappings . entityToUnicode [ k ]
230- } ;
231- } ) ;
232-
233226var NEWLINES = / ( \r \n ? | \n ) / g;
234227
235228var SPLIT_TAGS = / ( < [ ^ < > ] * > ) / ;
@@ -254,6 +247,14 @@ var BR_TAG = /<br(\s+.*)?>/i;
254247 *
255248 * Because we hack in other attributes with style (sub & sup), drop any trailing
256249 * semicolon in user-supplied styles so we can consistently append the tag-dependent style
250+ *
251+ * These are for tag attributes; Chrome anyway will convert entities in
252+ * attribute values, but not in attribute names
253+ * you can test this by for example:
254+ * > p = document.createElement('p')
255+ * > p.innerHTML = '<span style="font-color:red;">Hi</span>'
256+ * > p.innerHTML
257+ * <- '<span style="font-color:red;">Hi</span>'
257258 */
258259var STYLEMATCH = / ( ^ | [ \s " ' ] ) s t y l e \s * = \s * ( " ( [ ^ " ] * ) ; ? " | ' ( [ ^ ' ] * ) ; ? ' ) / i;
259260var HREFMATCH = / ( ^ | [ \s " ' ] ) h r e f \s * = \s * ( " ( [ ^ " ] * ) " | ' ( [ ^ ' ] * ) ' ) / i;
@@ -265,7 +266,8 @@ var POPUPMATCH = /(^|[\s"'])popup\s*=\s*("([\w=,]*)"|'([\w=,]*)')/i;
265266function getQuotedMatch ( _str , re ) {
266267 if ( ! _str ) return null ;
267268 var match = _str . match ( re ) ;
268- return match && ( match [ 3 ] || match [ 4 ] ) ;
269+ var result = match && ( match [ 3 ] || match [ 4 ] ) ;
270+ return result && convertEntities ( result ) ;
269271}
270272
271273var COLORMATCH = / ( ^ | ; ) \s * c o l o r : / ;
@@ -276,19 +278,45 @@ exports.plainText = function(_str) {
276278 return ( _str || '' ) . replace ( STRIP_TAGS , ' ' ) ;
277279} ;
278280
279- function replaceFromMapObject ( _str , list ) {
280- if ( ! _str ) return '' ;
281-
282- for ( var i = 0 ; i < list . length ; i ++ ) {
283- var item = list [ i ] ;
284- _str = _str . replace ( item . regExp , item . sub ) ;
285- }
281+ // NOTE: in general entities can contain uppercase too (so [a-zA-Z]) but all the
282+ // ones we support use only lowercase. If we ever change that, update the regex.
283+ var ENTITY_MATCH = / & ( # \d + | # x [ \d a - f A - F ] + | [ a - z ] + ) ; / g;
284+ function convertEntities ( _str ) {
285+ return _str . replace ( ENTITY_MATCH , function ( fullMatch , innerMatch ) {
286+ var outChar ;
287+ if ( innerMatch . charAt ( 0 ) === '#' ) {
288+ // cannot use String.fromCodePoint in IE
289+ outChar = fromCodePoint (
290+ innerMatch . charAt ( 1 ) === 'x' ?
291+ parseInt ( innerMatch . substr ( 2 ) , 16 ) :
292+ parseInt ( innerMatch . substr ( 1 ) , 10 )
293+ ) ;
294+ }
295+ else outChar = entityToUnicode [ innerMatch ] ;
286296
287- return _str ;
297+ // as in regular HTML, if we didn't decode the entity just
298+ // leave the raw text in place.
299+ return outChar || fullMatch ;
300+ } ) ;
288301}
289-
290- function convertEntities ( _str ) {
291- return replaceFromMapObject ( _str , ENTITY_TO_UNICODE ) ;
302+ exports . convertEntities = convertEntities ;
303+
304+ // but also in other browsers we don't want to overflow
305+ var stringFromCodePoint = String . fromCodePoint ;
306+ var stringFromCharCode = String . fromCharCode ;
307+ function fromCodePoint ( code ) {
308+ // Don't allow overflow. In Chrome this turns into � but I feel like it's
309+ // more useful to just not convert it at all.
310+ if ( code > 0x10FFFF ) return ;
311+ if ( stringFromCodePoint ) return stringFromCodePoint ( code ) ;
312+
313+ // IE doesn't have String.fromCodePoint
314+ // see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
315+ if ( code <= 0xFFFF ) return stringFromCharCode ( code ) ;
316+ return stringFromCharCode (
317+ ( code >> 10 ) + 0xD7C0 ,
318+ ( code % 0x400 ) + 0xDC00
319+ ) ;
292320}
293321
294322/*
@@ -302,15 +330,14 @@ function convertEntities(_str) {
302330 * somewhat differently if it does, so just keep track of this when it happens.
303331 */
304332function buildSVGText ( containerNode , str ) {
305- str = convertEntities ( str )
306- /*
307- * Normalize behavior between IE and others wrt newlines and whitespace:pre
308- * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
309- * Chrome and FF display \n, \r, or \r\n as a space in this mode.
310- * I feel like at some point we turned these into <br> but currently we don't so
311- * I'm just going to cement what we do now in Chrome and FF
312- */
313- . replace ( NEWLINES , ' ' ) ;
333+ /*
334+ * Normalize behavior between IE and others wrt newlines and whitespace:pre
335+ * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
336+ * Chrome and FF display \n, \r, or \r\n as a space in this mode.
337+ * I feel like at some point we turned these into <br> but currently we don't so
338+ * I'm just going to cement what we do now in Chrome and FF
339+ */
340+ str = str . replace ( NEWLINES , ' ' ) ;
314341
315342 var hasLink = false ;
316343
@@ -435,7 +462,7 @@ function buildSVGText(containerNode, str) {
435462 newLine ( ) ;
436463 }
437464 else if ( tagStyle === undefined ) {
438- addTextNode ( currentNode , parti ) ;
465+ addTextNode ( currentNode , convertEntities ( parti ) ) ;
439466 }
440467 else {
441468 // tag - open or close
0 commit comments