char-class-to-meta-transform.js 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /**
  2. * The MIT License (MIT)
  3. * Copyright (c) 2017-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4. */
  5. 'use strict';
  6. /**
  7. * A regexp-tree plugin to replace standard character classes with
  8. * their meta symbols equivalents.
  9. */
  10. function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } }
  11. module.exports = {
  12. _hasIFlag: false,
  13. _hasUFlag: false,
  14. init: function init(ast) {
  15. this._hasIFlag = ast.flags.includes('i');
  16. this._hasUFlag = ast.flags.includes('u');
  17. },
  18. CharacterClass: function CharacterClass(path) {
  19. // [0-9] -> \d
  20. rewriteNumberRanges(path);
  21. // [a-zA-Z_0-9] -> \w
  22. rewriteWordRanges(path, this._hasIFlag, this._hasUFlag);
  23. // [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff] -> \s
  24. rewriteWhitespaceRanges(path);
  25. }
  26. };
  27. /**
  28. * Rewrites number ranges: [0-9] -> \d
  29. */
  30. function rewriteNumberRanges(path) {
  31. var node = path.node;
  32. node.expressions.forEach(function (expression, i) {
  33. if (isFullNumberRange(expression)) {
  34. path.getChild(i).replace({
  35. type: 'Char',
  36. value: '\\d',
  37. kind: 'meta'
  38. });
  39. }
  40. });
  41. }
  42. /**
  43. * Rewrites word ranges: [a-zA-Z_0-9] -> \w
  44. * Thus, the ranges may go in any order, and other symbols/ranges
  45. * are kept untouched, e.g. [a-z_\dA-Z$] -> [\w$]
  46. */
  47. function rewriteWordRanges(path, hasIFlag, hasUFlag) {
  48. var node = path.node;
  49. var numberPath = null;
  50. var lowerCasePath = null;
  51. var upperCasePath = null;
  52. var underscorePath = null;
  53. var u017fPath = null;
  54. var u212aPath = null;
  55. node.expressions.forEach(function (expression, i) {
  56. // \d
  57. if (isMetaChar(expression, '\\d')) {
  58. numberPath = path.getChild(i);
  59. }
  60. // a-z
  61. else if (isLowerCaseRange(expression)) {
  62. lowerCasePath = path.getChild(i);
  63. }
  64. // A-Z
  65. else if (isUpperCaseRange(expression)) {
  66. upperCasePath = path.getChild(i);
  67. }
  68. // _
  69. else if (isUnderscore(expression)) {
  70. underscorePath = path.getChild(i);
  71. } else if (hasIFlag && hasUFlag && isCodePoint(expression, 0x017f)) {
  72. u017fPath = path.getChild(i);
  73. } else if (hasIFlag && hasUFlag && isCodePoint(expression, 0x212a)) {
  74. u212aPath = path.getChild(i);
  75. }
  76. });
  77. // If we found the whole pattern, replace it.
  78. if (numberPath && (lowerCasePath && upperCasePath || hasIFlag && (lowerCasePath || upperCasePath)) && underscorePath && (!hasUFlag || !hasIFlag || u017fPath && u212aPath)) {
  79. // Put \w in place of \d.
  80. numberPath.replace({
  81. type: 'Char',
  82. value: '\\w',
  83. kind: 'meta'
  84. });
  85. // Other paths are removed.
  86. if (lowerCasePath) {
  87. lowerCasePath.remove();
  88. }
  89. if (upperCasePath) {
  90. upperCasePath.remove();
  91. }
  92. underscorePath.remove();
  93. if (u017fPath) {
  94. u017fPath.remove();
  95. }
  96. if (u212aPath) {
  97. u212aPath.remove();
  98. }
  99. }
  100. }
  101. /**
  102. * Rewrites whitespace ranges: [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff] -> \s.
  103. */
  104. var whitespaceRangeTests = [function (node) {
  105. return isChar(node, ' ');
  106. }].concat(_toConsumableArray(['\\f', '\\n', '\\r', '\\t', '\\v'].map(function (char) {
  107. return function (node) {
  108. return isMetaChar(node, char);
  109. };
  110. })), _toConsumableArray([0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, 0xfeff].map(function (codePoint) {
  111. return function (node) {
  112. return isCodePoint(node, codePoint);
  113. };
  114. })), [function (node) {
  115. return node.type === 'ClassRange' && isCodePoint(node.from, 0x2000) && isCodePoint(node.to, 0x200a);
  116. }]);
  117. function rewriteWhitespaceRanges(path) {
  118. var node = path.node;
  119. if (node.expressions.length < whitespaceRangeTests.length || !whitespaceRangeTests.every(function (test) {
  120. return node.expressions.some(function (expression) {
  121. return test(expression);
  122. });
  123. })) {
  124. return;
  125. }
  126. // If we found the whole pattern, replace it.
  127. // Put \s in place of \n.
  128. var nNode = node.expressions.find(function (expression) {
  129. return isMetaChar(expression, '\\n');
  130. });
  131. nNode.value = '\\s';
  132. nNode.symbol = undefined;
  133. nNode.codePoint = NaN;
  134. // Other paths are removed.
  135. node.expressions.map(function (expression, i) {
  136. return whitespaceRangeTests.some(function (test) {
  137. return test(expression);
  138. }) ? path.getChild(i) : undefined;
  139. }).filter(Boolean).forEach(function (path) {
  140. return path.remove();
  141. });
  142. }
  143. function isFullNumberRange(node) {
  144. return node.type === 'ClassRange' && node.from.value === '0' && node.to.value === '9';
  145. }
  146. function isChar(node, value) {
  147. var kind = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'simple';
  148. return node.type === 'Char' && node.value === value && node.kind === kind;
  149. }
  150. function isMetaChar(node, value) {
  151. return isChar(node, value, 'meta');
  152. }
  153. function isLowerCaseRange(node) {
  154. return node.type === 'ClassRange' && node.from.value === 'a' && node.to.value === 'z';
  155. }
  156. function isUpperCaseRange(node) {
  157. return node.type === 'ClassRange' && node.from.value === 'A' && node.to.value === 'Z';
  158. }
  159. function isUnderscore(node) {
  160. return node.type === 'Char' && node.value === '_' && node.kind === 'simple';
  161. }
  162. function isCodePoint(node, codePoint) {
  163. return node.type === 'Char' && node.kind === 'unicode' && node.codePoint === codePoint;
  164. }