parseUtils.js 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. "use strict";
  2. const fs = require('fs');
  3. const _ = require('lodash');
  4. const acorn = require('acorn');
  5. const walk = require('acorn-walk');
  6. module.exports = {
  7. parseBundle
  8. };
  9. function parseBundle(bundlePath) {
  10. const content = fs.readFileSync(bundlePath, 'utf8');
  11. const ast = acorn.parse(content, {
  12. sourceType: 'script',
  13. // I believe in a bright future of ECMAScript!
  14. // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
  15. // Seems like `acorn` supports such weird option value.
  16. ecmaVersion: 2050
  17. });
  18. const walkState = {
  19. locations: null,
  20. expressionStatementDepth: 0
  21. };
  22. walk.recursive(ast, walkState, {
  23. ExpressionStatement(node, state, c) {
  24. if (state.locations) return;
  25. state.expressionStatementDepth++;
  26. if ( // Webpack 5 stores modules in the the top-level IIFE
  27. state.expressionStatementDepth === 1 && ast.body.includes(node) && isIIFE(node)) {
  28. const fn = getIIFECallExpression(node);
  29. if ( // It should not contain neither arguments
  30. fn.arguments.length === 0 && // ...nor parameters
  31. fn.callee.params.length === 0) {
  32. // Modules are stored in the very first variable declaration as hash
  33. const firstVariableDeclaration = fn.callee.body.body.find(node => node.type === 'VariableDeclaration');
  34. if (firstVariableDeclaration) {
  35. for (const declaration of firstVariableDeclaration.declarations) {
  36. if (declaration.init) {
  37. state.locations = getModulesLocations(declaration.init);
  38. if (state.locations) {
  39. break;
  40. }
  41. }
  42. }
  43. }
  44. }
  45. }
  46. if (!state.locations) {
  47. c(node.expression, state);
  48. }
  49. state.expressionStatementDepth--;
  50. },
  51. AssignmentExpression(node, state) {
  52. if (state.locations) return; // Modules are stored in exports.modules:
  53. // exports.modules = {};
  54. const {
  55. left,
  56. right
  57. } = node;
  58. if (left && left.object && left.object.name === 'exports' && left.property && left.property.name === 'modules' && isModulesHash(right)) {
  59. state.locations = getModulesLocations(right);
  60. }
  61. },
  62. CallExpression(node, state, c) {
  63. if (state.locations) return;
  64. const args = node.arguments; // Main chunk with webpack loader.
  65. // Modules are stored in first argument:
  66. // (function (...) {...})(<modules>)
  67. if (node.callee.type === 'FunctionExpression' && !node.callee.id && args.length === 1 && isSimpleModulesList(args[0])) {
  68. state.locations = getModulesLocations(args[0]);
  69. return;
  70. } // Async Webpack < v4 chunk without webpack loader.
  71. // webpackJsonp([<chunks>], <modules>, ...)
  72. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  73. if (node.callee.type === 'Identifier' && mayBeAsyncChunkArguments(args) && isModulesList(args[1])) {
  74. state.locations = getModulesLocations(args[1]);
  75. return;
  76. } // Async Webpack v4 chunk without webpack loader.
  77. // (window.webpackJsonp=window.webpackJsonp||[]).push([[<chunks>], <modules>, ...]);
  78. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  79. if (isAsyncChunkPushExpression(node)) {
  80. state.locations = getModulesLocations(args[0].elements[1]);
  81. return;
  82. } // Webpack v4 WebWorkerChunkTemplatePlugin
  83. // globalObject.chunkCallbackName([<chunks>],<modules>, ...);
  84. // Both globalObject and chunkCallbackName can be changed through the config, so we can't check them.
  85. if (isAsyncWebWorkerChunkExpression(node)) {
  86. state.locations = getModulesLocations(args[1]);
  87. return;
  88. } // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
  89. // features (e.g. `umd` library output) can wrap modules list into additional IIFE.
  90. args.forEach(arg => c(arg, state));
  91. }
  92. });
  93. let modules;
  94. if (walkState.locations) {
  95. modules = _.mapValues(walkState.locations, loc => content.slice(loc.start, loc.end));
  96. } else {
  97. modules = {};
  98. }
  99. return {
  100. modules,
  101. src: content,
  102. runtimeSrc: getBundleRuntime(content, walkState.locations)
  103. };
  104. }
  105. /**
  106. * Returns bundle source except modules
  107. */
  108. function getBundleRuntime(content, modulesLocations) {
  109. const sortedLocations = Object.values(modulesLocations || {}).sort((a, b) => a.start - b.start);
  110. let result = '';
  111. let lastIndex = 0;
  112. for (const {
  113. start,
  114. end
  115. } of sortedLocations) {
  116. result += content.slice(lastIndex, start);
  117. lastIndex = end;
  118. }
  119. return result + content.slice(lastIndex, content.length);
  120. }
  121. function isIIFE(node) {
  122. return node.type === 'ExpressionStatement' && (node.expression.type === 'CallExpression' || node.expression.type === 'UnaryExpression' && node.expression.argument.type === 'CallExpression');
  123. }
  124. function getIIFECallExpression(node) {
  125. if (node.expression.type === 'UnaryExpression') {
  126. return node.expression.argument;
  127. } else {
  128. return node.expression;
  129. }
  130. }
  131. function isModulesList(node) {
  132. return isSimpleModulesList(node) || // Modules are contained in expression `Array([minimum ID]).concat([<module>, <module>, ...])`
  133. isOptimizedModulesArray(node);
  134. }
  135. function isSimpleModulesList(node) {
  136. return (// Modules are contained in hash. Keys are module ids.
  137. isModulesHash(node) || // Modules are contained in array. Indexes are module ids.
  138. isModulesArray(node)
  139. );
  140. }
  141. function isModulesHash(node) {
  142. return node.type === 'ObjectExpression' && node.properties.map(node => node.value).every(isModuleWrapper);
  143. }
  144. function isModulesArray(node) {
  145. return node.type === 'ArrayExpression' && node.elements.every(elem => // Some of array items may be skipped because there is no module with such id
  146. !elem || isModuleWrapper(elem));
  147. }
  148. function isOptimizedModulesArray(node) {
  149. // Checking whether modules are contained in `Array(<minimum ID>).concat(...modules)` array:
  150. // https://github.com/webpack/webpack/blob/v1.14.0/lib/Template.js#L91
  151. // The `<minimum ID>` + array indexes are module ids
  152. return node.type === 'CallExpression' && node.callee.type === 'MemberExpression' && // Make sure the object called is `Array(<some number>)`
  153. node.callee.object.type === 'CallExpression' && node.callee.object.callee.type === 'Identifier' && node.callee.object.callee.name === 'Array' && node.callee.object.arguments.length === 1 && isNumericId(node.callee.object.arguments[0]) && // Make sure the property X called for `Array(<some number>).X` is `concat`
  154. node.callee.property.type === 'Identifier' && node.callee.property.name === 'concat' && // Make sure exactly one array is passed in to `concat`
  155. node.arguments.length === 1 && isModulesArray(node.arguments[0]);
  156. }
  157. function isModuleWrapper(node) {
  158. return (// It's an anonymous function expression that wraps module
  159. (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && !node.id || // If `DedupePlugin` is used it can be an ID of duplicated module...
  160. isModuleId(node) || // or an array of shape [<module_id>, ...args]
  161. node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0])
  162. );
  163. }
  164. function isModuleId(node) {
  165. return node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string');
  166. }
  167. function isNumericId(node) {
  168. return node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0;
  169. }
  170. function isChunkIds(node) {
  171. // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  172. return node.type === 'ArrayExpression' && node.elements.every(isModuleId);
  173. }
  174. function isAsyncChunkPushExpression(node) {
  175. const {
  176. callee,
  177. arguments: args
  178. } = node;
  179. return callee.type === 'MemberExpression' && callee.property.name === 'push' && callee.object.type === 'AssignmentExpression' && args.length === 1 && args[0].type === 'ArrayExpression' && mayBeAsyncChunkArguments(args[0].elements) && isModulesList(args[0].elements[1]);
  180. }
  181. function mayBeAsyncChunkArguments(args) {
  182. return args.length >= 2 && isChunkIds(args[0]);
  183. }
  184. function isAsyncWebWorkerChunkExpression(node) {
  185. const {
  186. callee,
  187. type,
  188. arguments: args
  189. } = node;
  190. return type === 'CallExpression' && callee.type === 'MemberExpression' && args.length === 2 && isChunkIds(args[0]) && isModulesList(args[1]);
  191. }
  192. function getModulesLocations(node) {
  193. if (node.type === 'ObjectExpression') {
  194. // Modules hash
  195. const modulesNodes = node.properties;
  196. return modulesNodes.reduce((result, moduleNode) => {
  197. const moduleId = moduleNode.key.name || moduleNode.key.value;
  198. result[moduleId] = getModuleLocation(moduleNode.value);
  199. return result;
  200. }, {});
  201. }
  202. const isOptimizedArray = node.type === 'CallExpression';
  203. if (node.type === 'ArrayExpression' || isOptimizedArray) {
  204. // Modules array or optimized array
  205. const minId = isOptimizedArray ? // Get the [minId] value from the Array() call first argument literal value
  206. node.callee.object.arguments[0].value : // `0` for simple array
  207. 0;
  208. const modulesNodes = isOptimizedArray ? // The modules reside in the `concat()` function call arguments
  209. node.arguments[0].elements : node.elements;
  210. return modulesNodes.reduce((result, moduleNode, i) => {
  211. if (moduleNode) {
  212. result[i + minId] = getModuleLocation(moduleNode);
  213. }
  214. return result;
  215. }, {});
  216. }
  217. return {};
  218. }
  219. function getModuleLocation(node) {
  220. return {
  221. start: node.start,
  222. end: node.end
  223. };
  224. }