char-case-insensitive-lowercase-transform.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /**
  2. * The MIT License (MIT)
  3. * Copyright (c) 2017-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4. */
  5. 'use strict';
  6. var UPPER_A_CP = 'A'.codePointAt(0);
  7. var UPPER_Z_CP = 'Z'.codePointAt(0);
  8. /**
  9. * Transforms case-insensitive regexp to lowercase
  10. *
  11. * /AaBbÏ/i -> /aabbï/i
  12. */
  13. module.exports = {
  14. _AZClassRanges: null,
  15. _hasUFlag: false,
  16. init: function init(ast) {
  17. this._AZClassRanges = new Set();
  18. this._hasUFlag = ast.flags.includes('u');
  19. },
  20. shouldRun: function shouldRun(ast) {
  21. return ast.flags.includes('i');
  22. },
  23. Char: function Char(path) {
  24. var node = path.node,
  25. parent = path.parent;
  26. if (isNaN(node.codePoint)) {
  27. return;
  28. }
  29. // Engine support for case-insensitive matching without the u flag
  30. // for characters above \u1000 does not seem reliable.
  31. if (!this._hasUFlag && node.codePoint >= 0x1000) {
  32. return;
  33. }
  34. if (parent.type === 'ClassRange') {
  35. // The only class ranges we handle must be inside A-Z.
  36. // After the `from` char is processed, the isAZClassRange test
  37. // will be false, so we use a Set to keep track of parents and
  38. // process the `to` char.
  39. if (!this._AZClassRanges.has(parent) && !isAZClassRange(parent)) {
  40. return;
  41. }
  42. this._AZClassRanges.add(parent);
  43. }
  44. var lower = node.symbol.toLowerCase();
  45. if (lower !== node.symbol) {
  46. node.value = displaySymbolAsValue(lower, node);
  47. node.symbol = lower;
  48. node.codePoint = lower.codePointAt(0);
  49. }
  50. }
  51. };
  52. function isAZClassRange(classRange) {
  53. var from = classRange.from,
  54. to = classRange.to;
  55. // A-Z
  56. return from.codePoint >= UPPER_A_CP && from.codePoint <= UPPER_Z_CP && to.codePoint >= UPPER_A_CP && to.codePoint <= UPPER_Z_CP;
  57. }
  58. function displaySymbolAsValue(symbol, node) {
  59. var codePoint = symbol.codePointAt(0);
  60. if (node.kind === 'decimal') {
  61. return '\\' + codePoint;
  62. }
  63. if (node.kind === 'oct') {
  64. return '\\0' + codePoint.toString(8);
  65. }
  66. if (node.kind === 'hex') {
  67. return '\\x' + codePoint.toString(16);
  68. }
  69. if (node.kind === 'unicode') {
  70. if (node.isSurrogatePair) {
  71. var _getSurrogatePairFrom = getSurrogatePairFromCodePoint(codePoint),
  72. lead = _getSurrogatePairFrom.lead,
  73. trail = _getSurrogatePairFrom.trail;
  74. return '\\u' + '0'.repeat(4 - lead.length) + lead + '\\u' + '0'.repeat(4 - trail.length) + trail;
  75. } else if (node.value.includes('{')) {
  76. return '\\u{' + codePoint.toString(16) + '}';
  77. } else {
  78. var code = codePoint.toString(16);
  79. return '\\u' + '0'.repeat(4 - code.length) + code;
  80. }
  81. }
  82. // simple
  83. return symbol;
  84. }
  85. /**
  86. * Converts a code point to a surrogate pair.
  87. * Conversion algorithm is taken from The Unicode Standard 3.0 Section 3.7
  88. * (https://www.unicode.org/versions/Unicode3.0.0/ch03.pdf)
  89. * @param {number} codePoint - Between 0x10000 and 0x10ffff
  90. * @returns {{lead: string, trail: string}}
  91. */
  92. function getSurrogatePairFromCodePoint(codePoint) {
  93. var lead = Math.floor((codePoint - 0x10000) / 0x400) + 0xd800;
  94. var trail = (codePoint - 0x10000) % 0x400 + 0xdc00;
  95. return {
  96. lead: lead.toString(16),
  97. trail: trail.toString(16)
  98. };
  99. }