import he from 'he'; import { selectAll, selectOne } from 'css-select'; import Node from './node'; import NodeType from './type'; import TextNode from './text'; import Matcher from '../matcher'; import arr_back from '../back'; import CommentNode from './comment'; // const { decode } = he; function decode(val) { // clone string return JSON.parse(JSON.stringify(he.decode(val))); } // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements const kBlockElements = new Set(); kBlockElements.add('address'); kBlockElements.add('ADDRESS'); kBlockElements.add('article'); kBlockElements.add('ARTICLE'); kBlockElements.add('aside'); kBlockElements.add('ASIDE'); kBlockElements.add('blockquote'); kBlockElements.add('BLOCKQUOTE'); kBlockElements.add('br'); kBlockElements.add('BR'); kBlockElements.add('details'); kBlockElements.add('DETAILS'); kBlockElements.add('dialog'); kBlockElements.add('DIALOG'); kBlockElements.add('dd'); kBlockElements.add('DD'); kBlockElements.add('div'); kBlockElements.add('DIV'); kBlockElements.add('dl'); kBlockElements.add('DL'); kBlockElements.add('dt'); kBlockElements.add('DT'); kBlockElements.add('fieldset'); kBlockElements.add('FIELDSET'); kBlockElements.add('figcaption'); kBlockElements.add('FIGCAPTION'); kBlockElements.add('figure'); kBlockElements.add('FIGURE'); kBlockElements.add('footer'); kBlockElements.add('FOOTER'); kBlockElements.add('form'); kBlockElements.add('FORM'); kBlockElements.add('h1'); kBlockElements.add('H1'); kBlockElements.add('h2'); kBlockElements.add('H2'); kBlockElements.add('h3'); kBlockElements.add('H3'); kBlockElements.add('h4'); kBlockElements.add('H4'); kBlockElements.add('h5'); kBlockElements.add('H5'); kBlockElements.add('h6'); kBlockElements.add('H6'); kBlockElements.add('header'); kBlockElements.add('HEADER'); kBlockElements.add('hgroup'); kBlockElements.add('HGROUP'); kBlockElements.add('hr'); kBlockElements.add('HR'); kBlockElements.add('li'); kBlockElements.add('LI'); kBlockElements.add('main'); kBlockElements.add('MAIN'); kBlockElements.add('nav'); kBlockElements.add('NAV'); kBlockElements.add('ol'); kBlockElements.add('OL'); kBlockElements.add('p'); kBlockElements.add('P'); kBlockElements.add('pre'); kBlockElements.add('PRE'); kBlockElements.add('section'); kBlockElements.add('SECTION'); kBlockElements.add('table'); kBlockElements.add('TABLE'); kBlockElements.add('td'); kBlockElements.add('TD'); kBlockElements.add('tr'); kBlockElements.add('TR'); kBlockElements.add('ul'); kBlockElements.add('UL'); class DOMTokenList { constructor(valuesInit = [], afterUpdate = (() => null)) { this._set = new Set(valuesInit); this._afterUpdate = afterUpdate; } _validate(c) { if (/\s/.test(c)) { throw new Error(`DOMException in DOMTokenList.add: The token '${c}' contains HTML space characters, which are not valid in tokens.`); } } add(c) { this._validate(c); this._set.add(c); this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call } replace(c1, c2) { this._validate(c2); this._set.delete(c1); this._set.add(c2); this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call } remove(c) { this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call } toggle(c) { this._validate(c); if (this._set.has(c)) this._set.delete(c); else this._set.add(c); this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call } contains(c) { return this._set.has(c); } get length() { return this._set.size; } values() { return this._set.values(); } get value() { return Array.from(this._set.values()); } toString() { return Array.from(this._set.values()).join(' '); } } /** * HTMLElement, which contains a set of children. * * Note: this is a minimalist implementation, no complete tree * structure provided (no parentNode, nextSibling, * previousSibling etc). * @class HTMLElement * @extends {Node} */ export default class HTMLElement extends Node { /** * Creates an instance of HTMLElement. * @param keyAttrs id and class attribute * @param [rawAttrs] attributes in string * * @memberof HTMLElement */ constructor(tagName, keyAttrs, rawAttrs = '', parentNode) { super(parentNode); this.rawAttrs = rawAttrs; /** * Node Type declaration. */ this.nodeType = NodeType.ELEMENT_NODE; this.rawTagName = tagName; this.rawAttrs = rawAttrs || ''; this.id = keyAttrs.id || ''; this.childNodes = []; this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], (classList) => (this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call )); if (keyAttrs.id) { if (!rawAttrs) { this.rawAttrs = `id="${keyAttrs.id}"`; } } if (keyAttrs.class) { if (!rawAttrs) { const cls = `class="${this.classList.toString()}"`; if (this.rawAttrs) { this.rawAttrs += ` ${cls}`; } else { this.rawAttrs = cls; } } } } /** * Quote attribute values * @param attr attribute value * @returns {string} quoted value */ quoteAttribute(attr) { if (attr === null) { return "null"; } return JSON.stringify(attr.replace(/"/g, '"')); } /** * Remove current element */ remove() { if (this.parentNode) { const children = this.parentNode.childNodes; this.parentNode.childNodes = children.filter((child) => { return this !== child; }); } } /** * Remove Child element from childNodes array * @param {HTMLElement} node node to remove */ removeChild(node) { this.childNodes = this.childNodes.filter((child) => { return (child !== node); }); } /** * Exchanges given child with new child * @param {HTMLElement} oldNode node to exchange * @param {HTMLElement} newNode new node */ exchangeChild(oldNode, newNode) { const children = this.childNodes; this.childNodes = children.map((child) => { if (child === oldNode) { return newNode; } return child; }); } get tagName() { return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName; } get localName() { return this.rawTagName.toLowerCase(); } /** * Get escpaed (as-it) text value of current node and its children. * @return {string} text content */ get rawText() { return this.childNodes.reduce((pre, cur) => { return (pre += cur.rawText); }, ''); } get textContent() { return this.rawText; } set textContent(val) { const content = [new TextNode(val, this)]; this.childNodes = content; } /** * Get unescaped text value of current node and its children. * @return {string} text content */ get text() { return decode(this.rawText); } /** * Get structured Text (with '\n' etc.) * @return {string} structured text */ get structuredText() { let currentBlock = []; const blocks = [currentBlock]; function dfs(node) { if (node.nodeType === NodeType.ELEMENT_NODE) { if (kBlockElements.has(node.rawTagName)) { if (currentBlock.length > 0) { blocks.push(currentBlock = []); } node.childNodes.forEach(dfs); if (currentBlock.length > 0) { blocks.push(currentBlock = []); } } else { node.childNodes.forEach(dfs); } } else if (node.nodeType === NodeType.TEXT_NODE) { if (node.isWhitespace) { // Whitespace node, postponed output currentBlock.prependWhitespace = true; } else { let text = node.trimmedText; if (currentBlock.prependWhitespace) { text = ` ${text}`; currentBlock.prependWhitespace = false; } currentBlock.push(text); } } } dfs(this); return blocks.map((block) => { // Normalize each line's whitespace return block.join('').replace(/\s{2,}/g, ' '); }) .join('\n').replace(/\s+$/, ''); // trimRight; } toString() { const tag = this.rawTagName; if (tag) { // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|')); // const is_void = void_tags.has(tag); const is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag); const attrs = this.rawAttrs ? ` ${this.rawAttrs}` : ''; if (is_void) { return `<${tag}${attrs}>`; } return `<${tag}${attrs}>${this.innerHTML}`; } return this.innerHTML; } get innerHTML() { return this.childNodes.map((child) => { return child.toString(); }).join(''); } set innerHTML(content) { //const r = parse(content, global.options); // TODO global.options ? const r = parse(content); this.childNodes = r.childNodes.length ? r.childNodes : [new TextNode(content, this)]; } set_content(content, options = {}) { if (content instanceof Node) { content = [content]; } else if (typeof content == 'string') { const r = parse(content, options); content = r.childNodes.length ? r.childNodes : [new TextNode(content, this)]; } this.childNodes = content; } replaceWith(...nodes) { const content = nodes.map((node) => { if (node instanceof Node) { return [node]; } else if (typeof node == 'string') { // const r = parse(content, global.options); // TODO global.options ? const r = parse(node); return r.childNodes.length ? r.childNodes : [new TextNode(node, this)]; } return []; }).flat(); const idx = this.parentNode.childNodes.findIndex((child) => { return child === this; }); this.parentNode.childNodes = [ ...this.parentNode.childNodes.slice(0, idx), ...content, ...this.parentNode.childNodes.slice(idx + 1), ]; } get outerHTML() { return this.toString(); } /** * Trim element from right (in block) after seeing pattern in a TextNode. * @param {RegExp} pattern pattern to find * @return {HTMLElement} reference to current node */ trimRight(pattern) { for (let i = 0; i < this.childNodes.length; i++) { const childNode = this.childNodes[i]; if (childNode.nodeType === NodeType.ELEMENT_NODE) { childNode.trimRight(pattern); } else { const index = childNode.rawText.search(pattern); if (index > -1) { childNode.rawText = childNode.rawText.substr(0, index); // trim all following nodes. this.childNodes.length = i + 1; } } } return this; } /** * Get DOM structure * @return {string} strucutre */ get structure() { const res = []; let indention = 0; function write(str) { res.push(' '.repeat(indention) + str); } function dfs(node) { const idStr = node.id ? (`#${node.id}`) : ''; const classStr = node.classList.length ? (`.${node.classList.value.join('.')}`) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call write(`${node.rawTagName}${idStr}${classStr}`); indention++; node.childNodes.forEach((childNode) => { if (childNode.nodeType === NodeType.ELEMENT_NODE) { dfs(childNode); } else if (childNode.nodeType === NodeType.TEXT_NODE) { if (!childNode.isWhitespace) { write('#text'); } } }); indention--; } dfs(this); return res.join('\n'); } /** * Remove whitespaces in this sub tree. * @return {HTMLElement} pointer to this */ removeWhitespace() { let o = 0; this.childNodes.forEach((node) => { if (node.nodeType === NodeType.TEXT_NODE) { if (node.isWhitespace) { return; } node.rawText = node.trimmedText; } else if (node.nodeType === NodeType.ELEMENT_NODE) { node.removeWhitespace(); } this.childNodes[o++] = node; }); this.childNodes.length = o; return this; } /** * Query CSS selector to find matching nodes. * @param {string} selector Simplified CSS selector * @return {HTMLElement[]} matching elements */ querySelectorAll(selector) { return selectAll(selector, this, { xmlMode: true, adapter: Matcher }); // let matcher: Matcher; // if (selector instanceof Matcher) { // matcher = selector; // matcher.reset(); // } else { // if (selector.includes(',')) { // const selectors = selector.split(','); // return Array.from(selectors.reduce((pre, cur) => { // const result = this.querySelectorAll(cur.trim()); // return result.reduce((p, c) => { // return p.add(c); // }, pre); // }, new Set())); // } // matcher = new Matcher(selector); // } // interface IStack { // 0: Node; // node // 1: number; // children // 2: boolean; // found flag // } // const stack = [] as IStack[]; // return this.childNodes.reduce((res, cur) => { // stack.push([cur, 0, false]); // while (stack.length) { // const state = arr_back(stack); // get last element // const el = state[0]; // if (state[1] === 0) { // // Seen for first time. // if (el.nodeType !== NodeType.ELEMENT_NODE) { // stack.pop(); // continue; // } // const html_el = el as HTMLElement; // state[2] = matcher.advance(html_el); // if (state[2]) { // if (matcher.matched) { // res.push(html_el); // res.push(...(html_el.querySelectorAll(selector))); // // no need to go further. // matcher.rewind(); // stack.pop(); // continue; // } // } // } // if (state[1] < el.childNodes.length) { // stack.push([el.childNodes[state[1]++], 0, false]); // } else { // if (state[2]) { // matcher.rewind(); // } // stack.pop(); // } // } // return res; // }, [] as HTMLElement[]); } /** * Query CSS Selector to find matching node. * @param {string} selector Simplified CSS selector * @return {HTMLElement} matching node */ querySelector(selector) { return selectOne(selector, this, { xmlMode: true, adapter: Matcher }); // let matcher: Matcher; // if (selector instanceof Matcher) { // matcher = selector; // matcher.reset(); // } else { // matcher = new Matcher(selector); // } // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[]; // for (const node of this.childNodes) { // stack.push([node, 0, false]); // while (stack.length) { // const state = arr_back(stack); // const el = state[0]; // if (state[1] === 0) { // // Seen for first time. // if (el.nodeType !== NodeType.ELEMENT_NODE) { // stack.pop(); // continue; // } // state[2] = matcher.advance(el as HTMLElement); // if (state[2]) { // if (matcher.matched) { // return el as HTMLElement; // } // } // } // if (state[1] < el.childNodes.length) { // stack.push([el.childNodes[state[1]++], 0, false]); // } else { // if (state[2]) { // matcher.rewind(); // } // stack.pop(); // } // } // } // return null; } /** * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null. * @param selector a DOMString containing a selector list */ closest(selector) { const mapChild = new Map(); let el = this; let old = null; function findOne(test, elems) { let elem = null; for (let i = 0, l = elems.length; i < l && !elem; i++) { const el = elems[i]; if (test(el)) { elem = el; } else { const child = mapChild.get(el); if (child) { elem = findOne(test, [child]); } } } return elem; } while (el) { mapChild.set(el, old); old = el; el = el.parentNode; } el = this; while (el) { const e = selectOne(selector, el, { xmlMode: true, adapter: { ...Matcher, getChildren(node) { const child = mapChild.get(node); return child && [child]; }, getSiblings(node) { return [node]; }, findOne, findAll() { return []; } } }); if (e) { return e; } el = el.parentNode; } return null; } /** * Append a child node to childNodes * @param {Node} node node to append * @return {Node} node appended */ appendChild(node) { // node.parentNode = this; this.childNodes.push(node); node.parentNode = this; return node; } /** * Get first child node * @return {Node} first child node */ get firstChild() { return this.childNodes[0]; } /** * Get last child node * @return {Node} last child node */ get lastChild() { return arr_back(this.childNodes); } /** * Get attributes * @access private * @return {Object} parsed and unescaped attributes */ get attrs() { if (this._attrs) { return this._attrs; } this._attrs = {}; const attrs = this.rawAttributes; for (const key in attrs) { const val = attrs[key] || ''; this._attrs[key.toLowerCase()] = decode(val); } return this._attrs; } get attributes() { const ret_attrs = {}; const attrs = this.rawAttributes; for (const key in attrs) { const val = attrs[key] || ''; ret_attrs[key] = decode(val); } return ret_attrs; } /** * Get escaped (as-it) attributes * @return {Object} parsed attributes */ get rawAttributes() { if (this._rawAttrs) { return this._rawAttrs; } const attrs = {}; if (this.rawAttrs) { const re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; let match; while ((match = re.exec(this.rawAttrs))) { attrs[match[1]] = match[2] || match[3] || match[4] || null; } } this._rawAttrs = attrs; return attrs; } removeAttribute(key) { const attrs = this.rawAttributes; delete attrs[key]; // Update this.attribute if (this._attrs) { delete this._attrs[key]; } // Update rawString this.rawAttrs = Object.keys(attrs).map((name) => { const val = JSON.stringify(attrs[name]); if (val === undefined || val === 'null') { return name; } return `${name}=${val}`; }).join(' '); // Update this.id if (key === 'id') { this.id = ''; } } hasAttribute(key) { return key.toLowerCase() in this.attrs; } /** * Get an attribute * @return {string} value of the attribute */ getAttribute(key) { return this.attrs[key.toLowerCase()]; } /** * Set an attribute value to the HTMLElement * @param {string} key The attribute name * @param {string} value The value to set, or null / undefined to remove an attribute */ setAttribute(key, value) { if (arguments.length < 2) { throw new Error('Failed to execute \'setAttribute\' on \'Element\''); } const k2 = key.toLowerCase(); const attrs = this.rawAttributes; for (const k in attrs) { if (k.toLowerCase() === k2) { key = k; break; } } attrs[key] = String(value); // update this.attrs if (this._attrs) { this._attrs[k2] = decode(attrs[key]); } // Update rawString this.rawAttrs = Object.keys(attrs).map((name) => { const val = this.quoteAttribute(attrs[name]); if (val === 'null' || val === '""') { return name; } return `${name}=${val}`; }).join(' '); // Update this.id if (key === 'id') { this.id = value; } } /** * Replace all the attributes of the HTMLElement by the provided attributes * @param {Attributes} attributes the new attribute set */ setAttributes(attributes) { // Invalidate current this.attributes if (this._attrs) { delete this._attrs; } // Invalidate current this.rawAttributes if (this._rawAttrs) { delete this._rawAttrs; } // Update rawString this.rawAttrs = Object.keys(attributes).map((name) => { const val = attributes[name]; if (val === 'null' || val === '""') { return name; } return `${name}=${this.quoteAttribute(String(val))}`; }).join(' '); } insertAdjacentHTML(where, html) { if (arguments.length < 2) { throw new Error('2 arguments required'); } const p = parse(html); if (where === 'afterend') { const idx = this.parentNode.childNodes.findIndex((child) => { return child === this; }); this.parentNode.childNodes.splice(idx + 1, 0, ...p.childNodes); p.childNodes.forEach((n) => { if (n instanceof HTMLElement) { n.parentNode = this.parentNode; } }); } else if (where === 'afterbegin') { this.childNodes.unshift(...p.childNodes); } else if (where === 'beforeend') { p.childNodes.forEach((n) => { this.appendChild(n); }); } else if (where === 'beforebegin') { const idx = this.parentNode.childNodes.findIndex((child) => { return child === this; }); this.parentNode.childNodes.splice(idx, 0, ...p.childNodes); p.childNodes.forEach((n) => { if (n instanceof HTMLElement) { n.parentNode = this.parentNode; } }); } else { throw new Error(`The value provided ('${where}') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'`); } // if (!where || html === undefined || html === null) { // return; // } } get nextSibling() { if (this.parentNode) { const children = this.parentNode.childNodes; let i = 0; while (i < children.length) { const child = children[i++]; if (this === child) { return children[i] || null; } } return null; } } get nextElementSibling() { if (this.parentNode) { const children = this.parentNode.childNodes; let i = 0; let find = false; while (i < children.length) { const child = children[i++]; if (find) { if (child instanceof HTMLElement) { return child || null; } } else if (this === child) { find = true; } } return null; } } get classNames() { return this.classList.toString(); } } // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name const kMarkupPattern = /)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig; // <(?[^\s]*)(.*)>(.*)> // <([a-z][-.:0-9_a-z]*)\s*\/> // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?> // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?[^\s]*)(.*)>(.*)> const kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig; const kSelfClosingElements = { area: true, AREA: true, base: true, BASE: true, br: true, BR: true, col: true, COL: true, hr: true, HR: true, img: true, IMG: true, input: true, INPUT: true, link: true, LINK: true, meta: true, META: true, source: true, SOURCE: true, embed: true, EMBED: true, param: true, PARAM: true, track: true, TRACK: true, wbr: true, WBR: true }; const kElementsClosedByOpening = { li: { li: true, LI: true }, LI: { li: true, LI: true }, p: { p: true, div: true, P: true, DIV: true }, P: { p: true, div: true, P: true, DIV: true }, b: { div: true, DIV: true }, B: { div: true, DIV: true }, td: { td: true, th: true, TD: true, TH: true }, TD: { td: true, th: true, TD: true, TH: true }, th: { td: true, th: true, TD: true, TH: true }, TH: { td: true, th: true, TD: true, TH: true }, h1: { h1: true, H1: true }, H1: { h1: true, H1: true }, h2: { h2: true, H2: true }, H2: { h2: true, H2: true }, h3: { h3: true, H3: true }, H3: { h3: true, H3: true }, h4: { h4: true, H4: true }, H4: { h4: true, H4: true }, h5: { h5: true, H5: true }, H5: { h5: true, H5: true }, h6: { h6: true, H6: true }, H6: { h6: true, H6: true } }; const kElementsClosedByClosing = { li: { ul: true, ol: true, UL: true, OL: true }, LI: { ul: true, ol: true, UL: true, OL: true }, a: { div: true, DIV: true }, A: { div: true, DIV: true }, b: { div: true, DIV: true }, B: { div: true, DIV: true }, i: { div: true, DIV: true }, I: { div: true, DIV: true }, p: { div: true, DIV: true }, P: { div: true, DIV: true }, td: { tr: true, table: true, TR: true, TABLE: true }, TD: { tr: true, table: true, TR: true, TABLE: true }, th: { tr: true, table: true, TR: true, TABLE: true }, TH: { tr: true, table: true, TR: true, TABLE: true } }; const frameflag = 'documentfragmentcontainer'; /** * Parses HTML and returns a root element * Parse a chuck of HTML source. * @param {string} data html * @return {HTMLElement} root element */ export function base_parse(data, options = { lowerCaseTagName: false, comment: false }) { const elements = options.blockTextElements || { script: true, noscript: true, style: true, pre: true }; const element_names = Object.keys(elements); const kBlockTextElements = element_names.map((it) => { return new RegExp(it, 'i'); }); const kIgnoreElements = element_names.filter((it) => { return elements[it]; }).map((it) => { return new RegExp(it, 'i'); }); function element_should_be_ignore(tag) { return kIgnoreElements.some((it) => { return it.test(tag); }); } function is_block_text_element(tag) { return kBlockTextElements.some((it) => { return it.test(tag); }); } const root = new HTMLElement(null, {}, '', null); let currentParent = root; const stack = [root]; let lastTextPos = -1; let match; // https://github.com/taoqf/node-html-parser/issues/38 data = `<${frameflag}>${data}`; while ((match = kMarkupPattern.exec(data))) { if (lastTextPos > -1) { if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) { // if has content const text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length); currentParent.appendChild(new TextNode(text, currentParent)); } } lastTextPos = kMarkupPattern.lastIndex; if (match[2] === frameflag) { continue; } if (match[0][1] === '!') { // this is a comment if (options.comment) { // Only keep what is in between const text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4); currentParent.appendChild(new CommentNode(text, currentParent)); } continue; } if (options.lowerCaseTagName) { match[2] = match[2].toLowerCase(); } if (!match[1]) { // not or ... const closeMarkup = ``; const index = (() => { if (options.lowerCaseTagName) { return data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex); } return data.indexOf(closeMarkup, kMarkupPattern.lastIndex); })(); if (element_should_be_ignore(match[2])) { let text; if (index === -1) { // there is no matching ending for the text element. text = data.substr(kMarkupPattern.lastIndex); } else { text = data.substring(kMarkupPattern.lastIndex, index); } if (text.length > 0) { currentParent.appendChild(new TextNode(text, currentParent)); } } if (index === -1) { lastTextPos = kMarkupPattern.lastIndex = data.length + 1; } else { lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length; match[1] = 'true'; } } } if (match[1] || match[4] || kSelfClosingElements[match[2]]) { // or
etc. while (true) { if (currentParent.rawTagName === match[2]) { stack.pop(); currentParent = arr_back(stack); break; } else { const tagName = currentParent.tagName; // Trying to close current tag, and move on if (kElementsClosedByClosing[tagName]) { if (kElementsClosedByClosing[tagName][match[2]]) { stack.pop(); currentParent = arr_back(stack); continue; } } // Use aggressive strategy to handle unmatching markups. break; } } } } return stack; } /** * Parses HTML and returns a root element * Parse a chuck of HTML source. */ export function parse(data, options = { lowerCaseTagName: false, comment: false }) { const stack = base_parse(data, options); const [root] = stack; while (stack.length > 1) { // Handle each error elements. const last = stack.pop(); const oneBefore = arr_back(stack); if (last.parentNode && last.parentNode.parentNode) { if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) { // Pair error case

handle : Fixes to

oneBefore.removeChild(last); last.childNodes.forEach((child) => { oneBefore.parentNode.appendChild(child); }); stack.pop(); } else { // Single error

handle: Just removes

oneBefore.removeChild(last); last.childNodes.forEach((child) => { oneBefore.appendChild(child); }); } } else { // If it's final element just skip. } } // response.childNodes.forEach((node) => { // if (node instanceof HTMLElement) { // node.parentNode = null; // } // }); return root; }