encode.js 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
  7. var xml_json_1 = __importDefault(require("./maps/xml.json"));
  8. var inverseXML = getInverseObj(xml_json_1.default);
  9. var xmlReplacer = getInverseReplacer(inverseXML);
  10. /**
  11. * Encodes all non-ASCII characters, as well as characters not valid in XML
  12. * documents using XML entities.
  13. *
  14. * If a character has no equivalent entity, a
  15. * numeric hexadecimal reference (eg. `ü`) will be used.
  16. */
  17. exports.encodeXML = getASCIIEncoder(inverseXML);
  18. var entities_json_1 = __importDefault(require("./maps/entities.json"));
  19. var inverseHTML = getInverseObj(entities_json_1.default);
  20. var htmlReplacer = getInverseReplacer(inverseHTML);
  21. /**
  22. * Encodes all entities and non-ASCII characters in the input.
  23. *
  24. * This includes characters that are valid ASCII characters in HTML documents.
  25. * For example `#` will be encoded as `#`. To get a more compact output,
  26. * consider using the `encodeNonAsciiHTML` function.
  27. *
  28. * If a character has no equivalent entity, a
  29. * numeric hexadecimal reference (eg. `ü`) will be used.
  30. */
  31. exports.encodeHTML = getInverse(inverseHTML, htmlReplacer);
  32. /**
  33. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  34. * documents using HTML entities.
  35. *
  36. * If a character has no equivalent entity, a
  37. * numeric hexadecimal reference (eg. `ü`) will be used.
  38. */
  39. exports.encodeNonAsciiHTML = getASCIIEncoder(inverseHTML);
  40. function getInverseObj(obj) {
  41. return Object.keys(obj)
  42. .sort()
  43. .reduce(function (inverse, name) {
  44. inverse[obj[name]] = "&" + name + ";";
  45. return inverse;
  46. }, {});
  47. }
  48. function getInverseReplacer(inverse) {
  49. var single = [];
  50. var multiple = [];
  51. for (var _i = 0, _a = Object.keys(inverse); _i < _a.length; _i++) {
  52. var k = _a[_i];
  53. if (k.length === 1) {
  54. // Add value to single array
  55. single.push("\\" + k);
  56. }
  57. else {
  58. // Add value to multiple array
  59. multiple.push(k);
  60. }
  61. }
  62. // Add ranges to single characters.
  63. single.sort();
  64. for (var start = 0; start < single.length - 1; start++) {
  65. // Find the end of a run of characters
  66. var end = start;
  67. while (end < single.length - 1 &&
  68. single[end].charCodeAt(1) + 1 === single[end + 1].charCodeAt(1)) {
  69. end += 1;
  70. }
  71. var count = 1 + end - start;
  72. // We want to replace at least three characters
  73. if (count < 3)
  74. continue;
  75. single.splice(start, count, single[start] + "-" + single[end]);
  76. }
  77. multiple.unshift("[" + single.join("") + "]");
  78. return new RegExp(multiple.join("|"), "g");
  79. }
  80. // /[^\0-\x7F]/gu
  81. var reNonASCII = /(?:[\x80-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g;
  82. var getCodePoint =
  83. // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
  84. String.prototype.codePointAt != null
  85. ? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
  86. function (str) { return str.codePointAt(0); }
  87. : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  88. function (c) {
  89. return (c.charCodeAt(0) - 0xd800) * 0x400 +
  90. c.charCodeAt(1) -
  91. 0xdc00 +
  92. 0x10000;
  93. };
  94. function singleCharReplacer(c) {
  95. return "&#x" + (c.length > 1 ? getCodePoint(c) : c.charCodeAt(0))
  96. .toString(16)
  97. .toUpperCase() + ";";
  98. }
  99. function getInverse(inverse, re) {
  100. return function (data) {
  101. return data
  102. .replace(re, function (name) { return inverse[name]; })
  103. .replace(reNonASCII, singleCharReplacer);
  104. };
  105. }
  106. var reEscapeChars = new RegExp(xmlReplacer.source + "|" + reNonASCII.source, "g");
  107. /**
  108. * Encodes all non-ASCII characters, as well as characters not valid in XML
  109. * documents using numeric hexadecimal reference (eg. `&#xfc;`).
  110. *
  111. * Have a look at `escapeUTF8` if you want a more concise output at the expense
  112. * of reduced transportability.
  113. *
  114. * @param data String to escape.
  115. */
  116. function escape(data) {
  117. return data.replace(reEscapeChars, singleCharReplacer);
  118. }
  119. exports.escape = escape;
  120. /**
  121. * Encodes all characters not valid in XML documents using numeric hexadecimal
  122. * reference (eg. `&#xfc;`).
  123. *
  124. * Note that the output will be character-set dependent.
  125. *
  126. * @param data String to escape.
  127. */
  128. function escapeUTF8(data) {
  129. return data.replace(xmlReplacer, singleCharReplacer);
  130. }
  131. exports.escapeUTF8 = escapeUTF8;
  132. function getASCIIEncoder(obj) {
  133. return function (data) {
  134. return data.replace(reEscapeChars, function (c) { return obj[c] || singleCharReplacer(c); });
  135. };
  136. }