@@ -15,7 +15,6 @@ var d3 = require('d3');
15
15
16
16
var Lib = require ( '../lib' ) ;
17
17
var xmlnsNamespaces = require ( '../constants/xmlns_namespaces' ) ;
18
- var entityToUnicode = require ( '../constants/string_mappings' ) . entityToUnicode ;
19
18
var LINE_SPACING = require ( '../constants/alignment' ) . LINE_SPACING ;
20
19
21
20
// text converter
@@ -278,6 +277,32 @@ exports.plainText = function(_str) {
278
277
return ( _str || '' ) . replace ( STRIP_TAGS , ' ' ) ;
279
278
} ;
280
279
280
+ /*
281
+ * N.B. HTML entities are listed without the leading '&' and trailing ';'
282
+ * https://www.freeformatter.com/html-entities.html
283
+ *
284
+ * FWIW if we wanted to support the full set, it has 2261 entries:
285
+ * https://www.w3.org/TR/html5/entities.json
286
+ * though I notice that some of these are duplicates and/or are missing ";"
287
+ * eg: "&", "&", "&", and "&" all map to "&"
288
+ * We no longer need to include numeric entities here, these are now handled
289
+ * by String.fromCodePoint/fromCharCode
290
+ *
291
+ * Anyway the only ones that are really important to allow are the HTML special
292
+ * chars <, >, and &, because these ones can trigger special processing if not
293
+ * replaced by the corresponding entity.
294
+ */
295
+ var entityToUnicode = {
296
+ mu : 'μ' ,
297
+ amp : '&' ,
298
+ lt : '<' ,
299
+ gt : '>' ,
300
+ nbsp : ' ' ,
301
+ times : '×' ,
302
+ plusmn : '±' ,
303
+ deg : '°'
304
+ } ;
305
+
281
306
// NOTE: in general entities can contain uppercase too (so [a-zA-Z]) but all the
282
307
// ones we support use only lowercase. If we ever change that, update the regex.
283
308
var ENTITY_MATCH = / & ( # \d + | # x [ \d a - f A - F ] + | [ a - z ] + ) ; / g;
0 commit comments