html.js 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228
  1. "use strict";
  2. var __extends = (this && this.__extends) || (function () {
  3. var extendStatics = function (d, b) {
  4. extendStatics = Object.setPrototypeOf ||
  5. ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
  6. function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
  7. return extendStatics(d, b);
  8. };
  9. return function (d, b) {
  10. if (typeof b !== "function" && b !== null)
  11. throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
  12. extendStatics(d, b);
  13. function __() { this.constructor = d; }
  14. d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
  15. };
  16. })();
  17. var __assign = (this && this.__assign) || function () {
  18. __assign = Object.assign || function(t) {
  19. for (var s, i = 1, n = arguments.length; i < n; i++) {
  20. s = arguments[i];
  21. for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
  22. t[p] = s[p];
  23. }
  24. return t;
  25. };
  26. return __assign.apply(this, arguments);
  27. };
  28. var __spreadArray = (this && this.__spreadArray) || function (to, from) {
  29. for (var i = 0, il = from.length, j = to.length; i < il; i++, j++)
  30. to[j] = from[i];
  31. return to;
  32. };
  33. var __importDefault = (this && this.__importDefault) || function (mod) {
  34. return (mod && mod.__esModule) ? mod : { "default": mod };
  35. };
  36. Object.defineProperty(exports, "__esModule", { value: true });
  37. exports.parse = exports.base_parse = void 0;
  38. var he_1 = __importDefault(require("he"));
  39. var css_select_1 = require("css-select");
  40. var node_1 = __importDefault(require("./node"));
  41. var type_1 = __importDefault(require("./type"));
  42. var text_1 = __importDefault(require("./text"));
  43. var matcher_1 = __importDefault(require("../matcher"));
  44. var back_1 = __importDefault(require("../back"));
  45. var comment_1 = __importDefault(require("./comment"));
  46. // const { decode } = he;
  47. function decode(val) {
  48. // clone string
  49. return JSON.parse(JSON.stringify(he_1.default.decode(val)));
  50. }
  51. // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
  52. var kBlockElements = new Set();
  53. kBlockElements.add('address');
  54. kBlockElements.add('ADDRESS');
  55. kBlockElements.add('article');
  56. kBlockElements.add('ARTICLE');
  57. kBlockElements.add('aside');
  58. kBlockElements.add('ASIDE');
  59. kBlockElements.add('blockquote');
  60. kBlockElements.add('BLOCKQUOTE');
  61. kBlockElements.add('br');
  62. kBlockElements.add('BR');
  63. kBlockElements.add('details');
  64. kBlockElements.add('DETAILS');
  65. kBlockElements.add('dialog');
  66. kBlockElements.add('DIALOG');
  67. kBlockElements.add('dd');
  68. kBlockElements.add('DD');
  69. kBlockElements.add('div');
  70. kBlockElements.add('DIV');
  71. kBlockElements.add('dl');
  72. kBlockElements.add('DL');
  73. kBlockElements.add('dt');
  74. kBlockElements.add('DT');
  75. kBlockElements.add('fieldset');
  76. kBlockElements.add('FIELDSET');
  77. kBlockElements.add('figcaption');
  78. kBlockElements.add('FIGCAPTION');
  79. kBlockElements.add('figure');
  80. kBlockElements.add('FIGURE');
  81. kBlockElements.add('footer');
  82. kBlockElements.add('FOOTER');
  83. kBlockElements.add('form');
  84. kBlockElements.add('FORM');
  85. kBlockElements.add('h1');
  86. kBlockElements.add('H1');
  87. kBlockElements.add('h2');
  88. kBlockElements.add('H2');
  89. kBlockElements.add('h3');
  90. kBlockElements.add('H3');
  91. kBlockElements.add('h4');
  92. kBlockElements.add('H4');
  93. kBlockElements.add('h5');
  94. kBlockElements.add('H5');
  95. kBlockElements.add('h6');
  96. kBlockElements.add('H6');
  97. kBlockElements.add('header');
  98. kBlockElements.add('HEADER');
  99. kBlockElements.add('hgroup');
  100. kBlockElements.add('HGROUP');
  101. kBlockElements.add('hr');
  102. kBlockElements.add('HR');
  103. kBlockElements.add('li');
  104. kBlockElements.add('LI');
  105. kBlockElements.add('main');
  106. kBlockElements.add('MAIN');
  107. kBlockElements.add('nav');
  108. kBlockElements.add('NAV');
  109. kBlockElements.add('ol');
  110. kBlockElements.add('OL');
  111. kBlockElements.add('p');
  112. kBlockElements.add('P');
  113. kBlockElements.add('pre');
  114. kBlockElements.add('PRE');
  115. kBlockElements.add('section');
  116. kBlockElements.add('SECTION');
  117. kBlockElements.add('table');
  118. kBlockElements.add('TABLE');
  119. kBlockElements.add('td');
  120. kBlockElements.add('TD');
  121. kBlockElements.add('tr');
  122. kBlockElements.add('TR');
  123. kBlockElements.add('ul');
  124. kBlockElements.add('UL');
  125. var DOMTokenList = /** @class */ (function () {
  126. function DOMTokenList(valuesInit, afterUpdate) {
  127. if (valuesInit === void 0) { valuesInit = []; }
  128. if (afterUpdate === void 0) { afterUpdate = (function () { return null; }); }
  129. this._set = new Set(valuesInit);
  130. this._afterUpdate = afterUpdate;
  131. }
  132. DOMTokenList.prototype._validate = function (c) {
  133. if (/\s/.test(c)) {
  134. throw new Error("DOMException in DOMTokenList.add: The token '" + c + "' contains HTML space characters, which are not valid in tokens.");
  135. }
  136. };
  137. DOMTokenList.prototype.add = function (c) {
  138. this._validate(c);
  139. this._set.add(c);
  140. this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
  141. };
  142. DOMTokenList.prototype.replace = function (c1, c2) {
  143. this._validate(c2);
  144. this._set.delete(c1);
  145. this._set.add(c2);
  146. this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
  147. };
  148. DOMTokenList.prototype.remove = function (c) {
  149. this._set.delete(c) &&
  150. this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
  151. };
  152. DOMTokenList.prototype.toggle = function (c) {
  153. this._validate(c);
  154. if (this._set.has(c))
  155. this._set.delete(c);
  156. else
  157. this._set.add(c);
  158. this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
  159. };
  160. DOMTokenList.prototype.contains = function (c) {
  161. return this._set.has(c);
  162. };
  163. Object.defineProperty(DOMTokenList.prototype, "length", {
  164. get: function () {
  165. return this._set.size;
  166. },
  167. enumerable: false,
  168. configurable: true
  169. });
  170. DOMTokenList.prototype.values = function () {
  171. return this._set.values();
  172. };
  173. Object.defineProperty(DOMTokenList.prototype, "value", {
  174. get: function () {
  175. return Array.from(this._set.values());
  176. },
  177. enumerable: false,
  178. configurable: true
  179. });
  180. DOMTokenList.prototype.toString = function () {
  181. return Array.from(this._set.values()).join(' ');
  182. };
  183. return DOMTokenList;
  184. }());
  185. /**
  186. * HTMLElement, which contains a set of children.
  187. *
  188. * Note: this is a minimalist implementation, no complete tree
  189. * structure provided (no parentNode, nextSibling,
  190. * previousSibling etc).
  191. * @class HTMLElement
  192. * @extends {Node}
  193. */
  194. var HTMLElement = /** @class */ (function (_super) {
  195. __extends(HTMLElement, _super);
  196. /**
  197. * Creates an instance of HTMLElement.
  198. * @param keyAttrs id and class attribute
  199. * @param [rawAttrs] attributes in string
  200. *
  201. * @memberof HTMLElement
  202. */
  203. function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
  204. if (rawAttrs === void 0) { rawAttrs = ''; }
  205. var _this = _super.call(this, parentNode) || this;
  206. _this.rawAttrs = rawAttrs;
  207. /**
  208. * Node Type declaration.
  209. */
  210. _this.nodeType = type_1.default.ELEMENT_NODE;
  211. _this.rawTagName = tagName;
  212. _this.rawAttrs = rawAttrs || '';
  213. _this.id = keyAttrs.id || '';
  214. _this.childNodes = [];
  215. _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return (_this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
  216. ); });
  217. if (keyAttrs.id) {
  218. if (!rawAttrs) {
  219. _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
  220. }
  221. }
  222. if (keyAttrs.class) {
  223. if (!rawAttrs) {
  224. var cls = "class=\"" + _this.classList.toString() + "\"";
  225. if (_this.rawAttrs) {
  226. _this.rawAttrs += " " + cls;
  227. }
  228. else {
  229. _this.rawAttrs = cls;
  230. }
  231. }
  232. }
  233. return _this;
  234. }
  235. /**
  236. * Quote attribute values
  237. * @param attr attribute value
  238. * @returns {string} quoted value
  239. */
  240. HTMLElement.prototype.quoteAttribute = function (attr) {
  241. if (attr === null) {
  242. return "null";
  243. }
  244. return JSON.stringify(attr.replace(/"/g, '&quot;'));
  245. };
  246. /**
  247. * Remove current element
  248. */
  249. HTMLElement.prototype.remove = function () {
  250. var _this = this;
  251. if (this.parentNode) {
  252. var children = this.parentNode.childNodes;
  253. this.parentNode.childNodes = children.filter(function (child) {
  254. return _this !== child;
  255. });
  256. }
  257. };
  258. /**
  259. * Remove Child element from childNodes array
  260. * @param {HTMLElement} node node to remove
  261. */
  262. HTMLElement.prototype.removeChild = function (node) {
  263. this.childNodes = this.childNodes.filter(function (child) {
  264. return (child !== node);
  265. });
  266. };
  267. /**
  268. * Exchanges given child with new child
  269. * @param {HTMLElement} oldNode node to exchange
  270. * @param {HTMLElement} newNode new node
  271. */
  272. HTMLElement.prototype.exchangeChild = function (oldNode, newNode) {
  273. var children = this.childNodes;
  274. this.childNodes = children.map(function (child) {
  275. if (child === oldNode) {
  276. return newNode;
  277. }
  278. return child;
  279. });
  280. };
  281. Object.defineProperty(HTMLElement.prototype, "tagName", {
  282. get: function () {
  283. return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
  284. },
  285. enumerable: false,
  286. configurable: true
  287. });
  288. Object.defineProperty(HTMLElement.prototype, "localName", {
  289. get: function () {
  290. return this.rawTagName.toLowerCase();
  291. },
  292. enumerable: false,
  293. configurable: true
  294. });
  295. Object.defineProperty(HTMLElement.prototype, "rawText", {
  296. /**
  297. * Get escpaed (as-it) text value of current node and its children.
  298. * @return {string} text content
  299. */
  300. get: function () {
  301. return this.childNodes.reduce(function (pre, cur) {
  302. return (pre += cur.rawText);
  303. }, '');
  304. },
  305. enumerable: false,
  306. configurable: true
  307. });
  308. Object.defineProperty(HTMLElement.prototype, "textContent", {
  309. get: function () {
  310. return this.rawText;
  311. },
  312. set: function (val) {
  313. var content = [new text_1.default(val, this)];
  314. this.childNodes = content;
  315. },
  316. enumerable: false,
  317. configurable: true
  318. });
  319. Object.defineProperty(HTMLElement.prototype, "text", {
  320. /**
  321. * Get unescaped text value of current node and its children.
  322. * @return {string} text content
  323. */
  324. get: function () {
  325. return decode(this.rawText);
  326. },
  327. enumerable: false,
  328. configurable: true
  329. });
  330. Object.defineProperty(HTMLElement.prototype, "structuredText", {
  331. /**
  332. * Get structured Text (with '\n' etc.)
  333. * @return {string} structured text
  334. */
  335. get: function () {
  336. var currentBlock = [];
  337. var blocks = [currentBlock];
  338. function dfs(node) {
  339. if (node.nodeType === type_1.default.ELEMENT_NODE) {
  340. if (kBlockElements.has(node.rawTagName)) {
  341. if (currentBlock.length > 0) {
  342. blocks.push(currentBlock = []);
  343. }
  344. node.childNodes.forEach(dfs);
  345. if (currentBlock.length > 0) {
  346. blocks.push(currentBlock = []);
  347. }
  348. }
  349. else {
  350. node.childNodes.forEach(dfs);
  351. }
  352. }
  353. else if (node.nodeType === type_1.default.TEXT_NODE) {
  354. if (node.isWhitespace) {
  355. // Whitespace node, postponed output
  356. currentBlock.prependWhitespace = true;
  357. }
  358. else {
  359. var text = node.trimmedText;
  360. if (currentBlock.prependWhitespace) {
  361. text = " " + text;
  362. currentBlock.prependWhitespace = false;
  363. }
  364. currentBlock.push(text);
  365. }
  366. }
  367. }
  368. dfs(this);
  369. return blocks.map(function (block) {
  370. // Normalize each line's whitespace
  371. return block.join('').replace(/\s{2,}/g, ' ');
  372. })
  373. .join('\n').replace(/\s+$/, ''); // trimRight;
  374. },
  375. enumerable: false,
  376. configurable: true
  377. });
  378. HTMLElement.prototype.toString = function () {
  379. var tag = this.rawTagName;
  380. if (tag) {
  381. // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
  382. // const is_void = void_tags.has(tag);
  383. var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
  384. var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
  385. if (is_void) {
  386. return "<" + tag + attrs + ">";
  387. }
  388. return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
  389. }
  390. return this.innerHTML;
  391. };
  392. Object.defineProperty(HTMLElement.prototype, "innerHTML", {
  393. get: function () {
  394. return this.childNodes.map(function (child) {
  395. return child.toString();
  396. }).join('');
  397. },
  398. set: function (content) {
  399. //const r = parse(content, global.options); // TODO global.options ?
  400. var r = parse(content);
  401. this.childNodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
  402. },
  403. enumerable: false,
  404. configurable: true
  405. });
  406. HTMLElement.prototype.set_content = function (content, options) {
  407. if (options === void 0) { options = {}; }
  408. if (content instanceof node_1.default) {
  409. content = [content];
  410. }
  411. else if (typeof content == 'string') {
  412. var r = parse(content, options);
  413. content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
  414. }
  415. this.childNodes = content;
  416. };
  417. HTMLElement.prototype.replaceWith = function () {
  418. var _this = this;
  419. var nodes = [];
  420. for (var _i = 0; _i < arguments.length; _i++) {
  421. nodes[_i] = arguments[_i];
  422. }
  423. var content = nodes.map(function (node) {
  424. if (node instanceof node_1.default) {
  425. return [node];
  426. }
  427. else if (typeof node == 'string') {
  428. // const r = parse(content, global.options); // TODO global.options ?
  429. var r = parse(node);
  430. return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
  431. }
  432. return [];
  433. }).flat();
  434. var idx = this.parentNode.childNodes.findIndex(function (child) {
  435. return child === _this;
  436. });
  437. this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx)), content), this.parentNode.childNodes.slice(idx + 1));
  438. };
  439. Object.defineProperty(HTMLElement.prototype, "outerHTML", {
  440. get: function () {
  441. return this.toString();
  442. },
  443. enumerable: false,
  444. configurable: true
  445. });
  446. /**
  447. * Trim element from right (in block) after seeing pattern in a TextNode.
  448. * @param {RegExp} pattern pattern to find
  449. * @return {HTMLElement} reference to current node
  450. */
  451. HTMLElement.prototype.trimRight = function (pattern) {
  452. for (var i = 0; i < this.childNodes.length; i++) {
  453. var childNode = this.childNodes[i];
  454. if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
  455. childNode.trimRight(pattern);
  456. }
  457. else {
  458. var index = childNode.rawText.search(pattern);
  459. if (index > -1) {
  460. childNode.rawText = childNode.rawText.substr(0, index);
  461. // trim all following nodes.
  462. this.childNodes.length = i + 1;
  463. }
  464. }
  465. }
  466. return this;
  467. };
  468. Object.defineProperty(HTMLElement.prototype, "structure", {
  469. /**
  470. * Get DOM structure
  471. * @return {string} strucutre
  472. */
  473. get: function () {
  474. var res = [];
  475. var indention = 0;
  476. function write(str) {
  477. res.push(' '.repeat(indention) + str);
  478. }
  479. function dfs(node) {
  480. var idStr = node.id ? ("#" + node.id) : '';
  481. var classStr = node.classList.length ? ("." + node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
  482. write("" + node.rawTagName + idStr + classStr);
  483. indention++;
  484. node.childNodes.forEach(function (childNode) {
  485. if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
  486. dfs(childNode);
  487. }
  488. else if (childNode.nodeType === type_1.default.TEXT_NODE) {
  489. if (!childNode.isWhitespace) {
  490. write('#text');
  491. }
  492. }
  493. });
  494. indention--;
  495. }
  496. dfs(this);
  497. return res.join('\n');
  498. },
  499. enumerable: false,
  500. configurable: true
  501. });
  502. /**
  503. * Remove whitespaces in this sub tree.
  504. * @return {HTMLElement} pointer to this
  505. */
  506. HTMLElement.prototype.removeWhitespace = function () {
  507. var _this = this;
  508. var o = 0;
  509. this.childNodes.forEach(function (node) {
  510. if (node.nodeType === type_1.default.TEXT_NODE) {
  511. if (node.isWhitespace) {
  512. return;
  513. }
  514. node.rawText = node.trimmedText;
  515. }
  516. else if (node.nodeType === type_1.default.ELEMENT_NODE) {
  517. node.removeWhitespace();
  518. }
  519. _this.childNodes[o++] = node;
  520. });
  521. this.childNodes.length = o;
  522. return this;
  523. };
  524. /**
  525. * Query CSS selector to find matching nodes.
  526. * @param {string} selector Simplified CSS selector
  527. * @return {HTMLElement[]} matching elements
  528. */
  529. HTMLElement.prototype.querySelectorAll = function (selector) {
  530. return css_select_1.selectAll(selector, this, {
  531. xmlMode: true,
  532. adapter: matcher_1.default
  533. });
  534. // let matcher: Matcher;
  535. // if (selector instanceof Matcher) {
  536. // matcher = selector;
  537. // matcher.reset();
  538. // } else {
  539. // if (selector.includes(',')) {
  540. // const selectors = selector.split(',');
  541. // return Array.from(selectors.reduce((pre, cur) => {
  542. // const result = this.querySelectorAll(cur.trim());
  543. // return result.reduce((p, c) => {
  544. // return p.add(c);
  545. // }, pre);
  546. // }, new Set<HTMLElement>()));
  547. // }
  548. // matcher = new Matcher(selector);
  549. // }
  550. // interface IStack {
  551. // 0: Node; // node
  552. // 1: number; // children
  553. // 2: boolean; // found flag
  554. // }
  555. // const stack = [] as IStack[];
  556. // return this.childNodes.reduce((res, cur) => {
  557. // stack.push([cur, 0, false]);
  558. // while (stack.length) {
  559. // const state = arr_back(stack); // get last element
  560. // const el = state[0];
  561. // if (state[1] === 0) {
  562. // // Seen for first time.
  563. // if (el.nodeType !== NodeType.ELEMENT_NODE) {
  564. // stack.pop();
  565. // continue;
  566. // }
  567. // const html_el = el as HTMLElement;
  568. // state[2] = matcher.advance(html_el);
  569. // if (state[2]) {
  570. // if (matcher.matched) {
  571. // res.push(html_el);
  572. // res.push(...(html_el.querySelectorAll(selector)));
  573. // // no need to go further.
  574. // matcher.rewind();
  575. // stack.pop();
  576. // continue;
  577. // }
  578. // }
  579. // }
  580. // if (state[1] < el.childNodes.length) {
  581. // stack.push([el.childNodes[state[1]++], 0, false]);
  582. // } else {
  583. // if (state[2]) {
  584. // matcher.rewind();
  585. // }
  586. // stack.pop();
  587. // }
  588. // }
  589. // return res;
  590. // }, [] as HTMLElement[]);
  591. };
  592. /**
  593. * Query CSS Selector to find matching node.
  594. * @param {string} selector Simplified CSS selector
  595. * @return {HTMLElement} matching node
  596. */
  597. HTMLElement.prototype.querySelector = function (selector) {
  598. return css_select_1.selectOne(selector, this, {
  599. xmlMode: true,
  600. adapter: matcher_1.default
  601. });
  602. // let matcher: Matcher;
  603. // if (selector instanceof Matcher) {
  604. // matcher = selector;
  605. // matcher.reset();
  606. // } else {
  607. // matcher = new Matcher(selector);
  608. // }
  609. // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
  610. // for (const node of this.childNodes) {
  611. // stack.push([node, 0, false]);
  612. // while (stack.length) {
  613. // const state = arr_back(stack);
  614. // const el = state[0];
  615. // if (state[1] === 0) {
  616. // // Seen for first time.
  617. // if (el.nodeType !== NodeType.ELEMENT_NODE) {
  618. // stack.pop();
  619. // continue;
  620. // }
  621. // state[2] = matcher.advance(el as HTMLElement);
  622. // if (state[2]) {
  623. // if (matcher.matched) {
  624. // return el as HTMLElement;
  625. // }
  626. // }
  627. // }
  628. // if (state[1] < el.childNodes.length) {
  629. // stack.push([el.childNodes[state[1]++], 0, false]);
  630. // } else {
  631. // if (state[2]) {
  632. // matcher.rewind();
  633. // }
  634. // stack.pop();
  635. // }
  636. // }
  637. // }
  638. // return null;
  639. };
  640. /**
  641. * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
  642. * @param selector a DOMString containing a selector list
  643. */
  644. HTMLElement.prototype.closest = function (selector) {
  645. var mapChild = new Map();
  646. var el = this;
  647. var old = null;
  648. function findOne(test, elems) {
  649. var elem = null;
  650. for (var i = 0, l = elems.length; i < l && !elem; i++) {
  651. var el_1 = elems[i];
  652. if (test(el_1)) {
  653. elem = el_1;
  654. }
  655. else {
  656. var child = mapChild.get(el_1);
  657. if (child) {
  658. elem = findOne(test, [child]);
  659. }
  660. }
  661. }
  662. return elem;
  663. }
  664. while (el) {
  665. mapChild.set(el, old);
  666. old = el;
  667. el = el.parentNode;
  668. }
  669. el = this;
  670. while (el) {
  671. var e = css_select_1.selectOne(selector, el, {
  672. xmlMode: true,
  673. adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
  674. var child = mapChild.get(node);
  675. return child && [child];
  676. },
  677. getSiblings: function (node) {
  678. return [node];
  679. },
  680. findOne: findOne,
  681. findAll: function () {
  682. return [];
  683. } })
  684. });
  685. if (e) {
  686. return e;
  687. }
  688. el = el.parentNode;
  689. }
  690. return null;
  691. };
  692. /**
  693. * Append a child node to childNodes
  694. * @param {Node} node node to append
  695. * @return {Node} node appended
  696. */
  697. HTMLElement.prototype.appendChild = function (node) {
  698. // node.parentNode = this;
  699. this.childNodes.push(node);
  700. node.parentNode = this;
  701. return node;
  702. };
  703. Object.defineProperty(HTMLElement.prototype, "firstChild", {
  704. /**
  705. * Get first child node
  706. * @return {Node} first child node
  707. */
  708. get: function () {
  709. return this.childNodes[0];
  710. },
  711. enumerable: false,
  712. configurable: true
  713. });
  714. Object.defineProperty(HTMLElement.prototype, "lastChild", {
  715. /**
  716. * Get last child node
  717. * @return {Node} last child node
  718. */
  719. get: function () {
  720. return back_1.default(this.childNodes);
  721. },
  722. enumerable: false,
  723. configurable: true
  724. });
  725. Object.defineProperty(HTMLElement.prototype, "attrs", {
  726. /**
  727. * Get attributes
  728. * @access private
  729. * @return {Object} parsed and unescaped attributes
  730. */
  731. get: function () {
  732. if (this._attrs) {
  733. return this._attrs;
  734. }
  735. this._attrs = {};
  736. var attrs = this.rawAttributes;
  737. for (var key in attrs) {
  738. var val = attrs[key] || '';
  739. this._attrs[key.toLowerCase()] = decode(val);
  740. }
  741. return this._attrs;
  742. },
  743. enumerable: false,
  744. configurable: true
  745. });
  746. Object.defineProperty(HTMLElement.prototype, "attributes", {
  747. get: function () {
  748. var ret_attrs = {};
  749. var attrs = this.rawAttributes;
  750. for (var key in attrs) {
  751. var val = attrs[key] || '';
  752. ret_attrs[key] = decode(val);
  753. }
  754. return ret_attrs;
  755. },
  756. enumerable: false,
  757. configurable: true
  758. });
  759. Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
  760. /**
  761. * Get escaped (as-it) attributes
  762. * @return {Object} parsed attributes
  763. */
  764. get: function () {
  765. if (this._rawAttrs) {
  766. return this._rawAttrs;
  767. }
  768. var attrs = {};
  769. if (this.rawAttrs) {
  770. var re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
  771. var match = void 0;
  772. while ((match = re.exec(this.rawAttrs))) {
  773. attrs[match[1]] = match[2] || match[3] || match[4] || null;
  774. }
  775. }
  776. this._rawAttrs = attrs;
  777. return attrs;
  778. },
  779. enumerable: false,
  780. configurable: true
  781. });
  782. HTMLElement.prototype.removeAttribute = function (key) {
  783. var attrs = this.rawAttributes;
  784. delete attrs[key];
  785. // Update this.attribute
  786. if (this._attrs) {
  787. delete this._attrs[key];
  788. }
  789. // Update rawString
  790. this.rawAttrs = Object.keys(attrs).map(function (name) {
  791. var val = JSON.stringify(attrs[name]);
  792. if (val === undefined || val === 'null') {
  793. return name;
  794. }
  795. return name + "=" + val;
  796. }).join(' ');
  797. // Update this.id
  798. if (key === 'id') {
  799. this.id = '';
  800. }
  801. };
  802. HTMLElement.prototype.hasAttribute = function (key) {
  803. return key.toLowerCase() in this.attrs;
  804. };
  805. /**
  806. * Get an attribute
  807. * @return {string} value of the attribute
  808. */
  809. HTMLElement.prototype.getAttribute = function (key) {
  810. return this.attrs[key.toLowerCase()];
  811. };
  812. /**
  813. * Set an attribute value to the HTMLElement
  814. * @param {string} key The attribute name
  815. * @param {string} value The value to set, or null / undefined to remove an attribute
  816. */
  817. HTMLElement.prototype.setAttribute = function (key, value) {
  818. var _this = this;
  819. if (arguments.length < 2) {
  820. throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
  821. }
  822. var k2 = key.toLowerCase();
  823. var attrs = this.rawAttributes;
  824. for (var k in attrs) {
  825. if (k.toLowerCase() === k2) {
  826. key = k;
  827. break;
  828. }
  829. }
  830. attrs[key] = String(value);
  831. // update this.attrs
  832. if (this._attrs) {
  833. this._attrs[k2] = decode(attrs[key]);
  834. }
  835. // Update rawString
  836. this.rawAttrs = Object.keys(attrs).map(function (name) {
  837. var val = _this.quoteAttribute(attrs[name]);
  838. if (val === 'null' || val === '""') {
  839. return name;
  840. }
  841. return name + "=" + val;
  842. }).join(' ');
  843. // Update this.id
  844. if (key === 'id') {
  845. this.id = value;
  846. }
  847. };
  848. /**
  849. * Replace all the attributes of the HTMLElement by the provided attributes
  850. * @param {Attributes} attributes the new attribute set
  851. */
  852. HTMLElement.prototype.setAttributes = function (attributes) {
  853. var _this = this;
  854. // Invalidate current this.attributes
  855. if (this._attrs) {
  856. delete this._attrs;
  857. }
  858. // Invalidate current this.rawAttributes
  859. if (this._rawAttrs) {
  860. delete this._rawAttrs;
  861. }
  862. // Update rawString
  863. this.rawAttrs = Object.keys(attributes).map(function (name) {
  864. var val = attributes[name];
  865. if (val === 'null' || val === '""') {
  866. return name;
  867. }
  868. return name + "=" + _this.quoteAttribute(String(val));
  869. }).join(' ');
  870. };
  871. HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
  872. var _a, _b, _c;
  873. var _this = this;
  874. if (arguments.length < 2) {
  875. throw new Error('2 arguments required');
  876. }
  877. var p = parse(html);
  878. if (where === 'afterend') {
  879. var idx = this.parentNode.childNodes.findIndex(function (child) {
  880. return child === _this;
  881. });
  882. (_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes));
  883. p.childNodes.forEach(function (n) {
  884. if (n instanceof HTMLElement) {
  885. n.parentNode = _this.parentNode;
  886. }
  887. });
  888. }
  889. else if (where === 'afterbegin') {
  890. (_b = this.childNodes).unshift.apply(_b, p.childNodes);
  891. }
  892. else if (where === 'beforeend') {
  893. p.childNodes.forEach(function (n) {
  894. _this.appendChild(n);
  895. });
  896. }
  897. else if (where === 'beforebegin') {
  898. var idx = this.parentNode.childNodes.findIndex(function (child) {
  899. return child === _this;
  900. });
  901. (_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes));
  902. p.childNodes.forEach(function (n) {
  903. if (n instanceof HTMLElement) {
  904. n.parentNode = _this.parentNode;
  905. }
  906. });
  907. }
  908. else {
  909. throw new Error("The value provided ('" + where + "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'");
  910. }
  911. // if (!where || html === undefined || html === null) {
  912. // return;
  913. // }
  914. };
  915. Object.defineProperty(HTMLElement.prototype, "nextSibling", {
  916. get: function () {
  917. if (this.parentNode) {
  918. var children = this.parentNode.childNodes;
  919. var i = 0;
  920. while (i < children.length) {
  921. var child = children[i++];
  922. if (this === child) {
  923. return children[i] || null;
  924. }
  925. }
  926. return null;
  927. }
  928. },
  929. enumerable: false,
  930. configurable: true
  931. });
  932. Object.defineProperty(HTMLElement.prototype, "nextElementSibling", {
  933. get: function () {
  934. if (this.parentNode) {
  935. var children = this.parentNode.childNodes;
  936. var i = 0;
  937. var find = false;
  938. while (i < children.length) {
  939. var child = children[i++];
  940. if (find) {
  941. if (child instanceof HTMLElement) {
  942. return child || null;
  943. }
  944. }
  945. else if (this === child) {
  946. find = true;
  947. }
  948. }
  949. return null;
  950. }
  951. },
  952. enumerable: false,
  953. configurable: true
  954. });
  955. Object.defineProperty(HTMLElement.prototype, "classNames", {
  956. get: function () {
  957. return this.classList.toString();
  958. },
  959. enumerable: false,
  960. configurable: true
  961. });
  962. return HTMLElement;
  963. }(node_1.default));
  964. exports.default = HTMLElement;
  965. // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
  966. var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
  967. // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
  968. // <([a-z][-.:0-9_a-z]*)\s*\/>
  969. // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
  970. // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
  971. var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
  972. var kSelfClosingElements = {
  973. area: true,
  974. AREA: true,
  975. base: true,
  976. BASE: true,
  977. br: true,
  978. BR: true,
  979. col: true,
  980. COL: true,
  981. hr: true,
  982. HR: true,
  983. img: true,
  984. IMG: true,
  985. input: true,
  986. INPUT: true,
  987. link: true,
  988. LINK: true,
  989. meta: true,
  990. META: true,
  991. source: true,
  992. SOURCE: true,
  993. embed: true,
  994. EMBED: true,
  995. param: true,
  996. PARAM: true,
  997. track: true,
  998. TRACK: true,
  999. wbr: true,
  1000. WBR: true
  1001. };
  1002. var kElementsClosedByOpening = {
  1003. li: { li: true, LI: true },
  1004. LI: { li: true, LI: true },
  1005. p: { p: true, div: true, P: true, DIV: true },
  1006. P: { p: true, div: true, P: true, DIV: true },
  1007. b: { div: true, DIV: true },
  1008. B: { div: true, DIV: true },
  1009. td: { td: true, th: true, TD: true, TH: true },
  1010. TD: { td: true, th: true, TD: true, TH: true },
  1011. th: { td: true, th: true, TD: true, TH: true },
  1012. TH: { td: true, th: true, TD: true, TH: true },
  1013. h1: { h1: true, H1: true },
  1014. H1: { h1: true, H1: true },
  1015. h2: { h2: true, H2: true },
  1016. H2: { h2: true, H2: true },
  1017. h3: { h3: true, H3: true },
  1018. H3: { h3: true, H3: true },
  1019. h4: { h4: true, H4: true },
  1020. H4: { h4: true, H4: true },
  1021. h5: { h5: true, H5: true },
  1022. H5: { h5: true, H5: true },
  1023. h6: { h6: true, H6: true },
  1024. H6: { h6: true, H6: true }
  1025. };
  1026. var kElementsClosedByClosing = {
  1027. li: { ul: true, ol: true, UL: true, OL: true },
  1028. LI: { ul: true, ol: true, UL: true, OL: true },
  1029. a: { div: true, DIV: true },
  1030. A: { div: true, DIV: true },
  1031. b: { div: true, DIV: true },
  1032. B: { div: true, DIV: true },
  1033. i: { div: true, DIV: true },
  1034. I: { div: true, DIV: true },
  1035. p: { div: true, DIV: true },
  1036. P: { div: true, DIV: true },
  1037. td: { tr: true, table: true, TR: true, TABLE: true },
  1038. TD: { tr: true, table: true, TR: true, TABLE: true },
  1039. th: { tr: true, table: true, TR: true, TABLE: true },
  1040. TH: { tr: true, table: true, TR: true, TABLE: true }
  1041. };
  1042. var frameflag = 'documentfragmentcontainer';
  1043. /**
  1044. * Parses HTML and returns a root element
  1045. * Parse a chuck of HTML source.
  1046. * @param {string} data html
  1047. * @return {HTMLElement} root element
  1048. */
  1049. function base_parse(data, options) {
  1050. if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
  1051. var elements = options.blockTextElements || {
  1052. script: true,
  1053. noscript: true,
  1054. style: true,
  1055. pre: true
  1056. };
  1057. var element_names = Object.keys(elements);
  1058. var kBlockTextElements = element_names.map(function (it) {
  1059. return new RegExp(it, 'i');
  1060. });
  1061. var kIgnoreElements = element_names.filter(function (it) {
  1062. return elements[it];
  1063. }).map(function (it) {
  1064. return new RegExp(it, 'i');
  1065. });
  1066. function element_should_be_ignore(tag) {
  1067. return kIgnoreElements.some(function (it) {
  1068. return it.test(tag);
  1069. });
  1070. }
  1071. function is_block_text_element(tag) {
  1072. return kBlockTextElements.some(function (it) {
  1073. return it.test(tag);
  1074. });
  1075. }
  1076. var root = new HTMLElement(null, {}, '', null);
  1077. var currentParent = root;
  1078. var stack = [root];
  1079. var lastTextPos = -1;
  1080. var match;
  1081. // https://github.com/taoqf/node-html-parser/issues/38
  1082. data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
  1083. var _loop_1 = function () {
  1084. if (lastTextPos > -1) {
  1085. if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
  1086. // if has content
  1087. var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
  1088. currentParent.appendChild(new text_1.default(text, currentParent));
  1089. }
  1090. }
  1091. lastTextPos = kMarkupPattern.lastIndex;
  1092. if (match[2] === frameflag) {
  1093. return "continue";
  1094. }
  1095. if (match[0][1] === '!') {
  1096. // this is a comment
  1097. if (options.comment) {
  1098. // Only keep what is in between <!-- and -->
  1099. var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
  1100. currentParent.appendChild(new comment_1.default(text, currentParent));
  1101. }
  1102. return "continue";
  1103. }
  1104. if (options.lowerCaseTagName) {
  1105. match[2] = match[2].toLowerCase();
  1106. }
  1107. if (!match[1]) {
  1108. // not </ tags
  1109. var attrs = {};
  1110. for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
  1111. attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
  1112. }
  1113. var tagName = currentParent.rawTagName;
  1114. if (!match[4] && kElementsClosedByOpening[tagName]) {
  1115. if (kElementsClosedByOpening[tagName][match[2]]) {
  1116. stack.pop();
  1117. currentParent = back_1.default(stack);
  1118. }
  1119. }
  1120. // ignore container tag we add above
  1121. // https://github.com/taoqf/node-html-parser/issues/38
  1122. currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3], null));
  1123. stack.push(currentParent);
  1124. if (is_block_text_element(match[2])) {
  1125. // a little test to find next </script> or </style> ...
  1126. var closeMarkup_1 = "</" + match[2] + ">";
  1127. var index = (function () {
  1128. if (options.lowerCaseTagName) {
  1129. return data.toLocaleLowerCase().indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
  1130. }
  1131. return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
  1132. })();
  1133. if (element_should_be_ignore(match[2])) {
  1134. var text = void 0;
  1135. if (index === -1) {
  1136. // there is no matching ending for the text element.
  1137. text = data.substr(kMarkupPattern.lastIndex);
  1138. }
  1139. else {
  1140. text = data.substring(kMarkupPattern.lastIndex, index);
  1141. }
  1142. if (text.length > 0) {
  1143. currentParent.appendChild(new text_1.default(text, currentParent));
  1144. }
  1145. }
  1146. if (index === -1) {
  1147. lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
  1148. }
  1149. else {
  1150. lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup_1.length;
  1151. match[1] = 'true';
  1152. }
  1153. }
  1154. }
  1155. if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
  1156. // </ or /> or <br> etc.
  1157. while (true) {
  1158. if (currentParent.rawTagName === match[2]) {
  1159. stack.pop();
  1160. currentParent = back_1.default(stack);
  1161. break;
  1162. }
  1163. else {
  1164. var tagName = currentParent.tagName;
  1165. // Trying to close current tag, and move on
  1166. if (kElementsClosedByClosing[tagName]) {
  1167. if (kElementsClosedByClosing[tagName][match[2]]) {
  1168. stack.pop();
  1169. currentParent = back_1.default(stack);
  1170. continue;
  1171. }
  1172. }
  1173. // Use aggressive strategy to handle unmatching markups.
  1174. break;
  1175. }
  1176. }
  1177. }
  1178. };
  1179. while ((match = kMarkupPattern.exec(data))) {
  1180. _loop_1();
  1181. }
  1182. return stack;
  1183. }
  1184. exports.base_parse = base_parse;
  1185. /**
  1186. * Parses HTML and returns a root element
  1187. * Parse a chuck of HTML source.
  1188. */
  1189. function parse(data, options) {
  1190. if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
  1191. var stack = base_parse(data, options);
  1192. var root = stack[0];
  1193. var _loop_2 = function () {
  1194. // Handle each error elements.
  1195. var last = stack.pop();
  1196. var oneBefore = back_1.default(stack);
  1197. if (last.parentNode && last.parentNode.parentNode) {
  1198. if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
  1199. // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
  1200. oneBefore.removeChild(last);
  1201. last.childNodes.forEach(function (child) {
  1202. oneBefore.parentNode.appendChild(child);
  1203. });
  1204. stack.pop();
  1205. }
  1206. else {
  1207. // Single error <div> <h3> </div> handle: Just removes <h3>
  1208. oneBefore.removeChild(last);
  1209. last.childNodes.forEach(function (child) {
  1210. oneBefore.appendChild(child);
  1211. });
  1212. }
  1213. }
  1214. else {
  1215. // If it's final element just skip.
  1216. }
  1217. };
  1218. while (stack.length > 1) {
  1219. _loop_2();
  1220. }
  1221. // response.childNodes.forEach((node) => {
  1222. // if (node instanceof HTMLElement) {
  1223. // node.parentNode = null;
  1224. // }
  1225. // });
  1226. return root;
  1227. }
  1228. exports.parse = parse;