parse.js 21 KB


  1. const util = require('./util')
  2. let source
  3. let parseState
  4. let stack
  5. let pos
  6. let line
  7. let column
  8. let token
  9. let key
  10. let root
  11. module.exports = function parse (text, reviver) {
  12. source = String(text)
  13. parseState = 'start'
  14. stack = []
  15. pos = 0
  16. line = 1
  17. column = 0
  18. token = undefined
  19. key = undefined
  20. root = undefined
  21. do {
  22. token = lex()
  23. // This code is unreachable.
  24. // if (!parseStates[parseState]) {
  25. // throw invalidParseState()
  26. // }
  27. parseStates[parseState]()
  28. } while (token.type !== 'eof')
  29. if (typeof reviver === 'function') {
  30. return internalize({'': root}, '', reviver)
  31. }
  32. return root
  33. }
  34. function internalize (holder, name, reviver) {
  35. const value = holder[name]
  36. if (value != null && typeof value === 'object') {
  37. for (const key in value) {
  38. const replacement = internalize(value, key, reviver)
  39. if (replacement === undefined) {
  40. delete value[key]
  41. } else {
  42. value[key] = replacement
  43. }
  44. }
  45. }
  46. return reviver.call(holder, name, value)
  47. }
  48. let lexState
  49. let buffer
  50. let doubleQuote
  51. let sign
  52. let c
  53. function lex () {
  54. lexState = 'default'
  55. buffer = ''
  56. doubleQuote = false
  57. sign = 1
  58. for (;;) {
  59. c = peek()
  60. // This code is unreachable.
  61. // if (!lexStates[lexState]) {
  62. // throw invalidLexState(lexState)
  63. // }
  64. const token = lexStates[lexState]()
  65. if (token) {
  66. return token
  67. }
  68. }
  69. }
  70. function peek () {
  71. if (source[pos]) {
  72. return String.fromCodePoint(source.codePointAt(pos))
  73. }
  74. }
  75. function read () {
  76. const c = peek()
  77. if (c === '\n') {
  78. line++
  79. column = 0
  80. } else if (c) {
  81. column += c.length
  82. } else {
  83. column++
  84. }
  85. if (c) {
  86. pos += c.length
  87. }
  88. return c
  89. }
  90. const lexStates = {
  91. default () {
  92. switch (c) {
  93. case '\t':
  94. case '\v':
  95. case '\f':
  96. case ' ':
  97. case '\u00A0':
  98. case '\uFEFF':
  99. case '\n':
  100. case '\r':
  101. case '\u2028':
  102. case '\u2029':
  103. read()
  104. return
  105. case '/':
  106. read()
  107. lexState = 'comment'
  108. return
  109. case undefined:
  110. read()
  111. return newToken('eof')
  112. }
  113. if (util.isSpaceSeparator(c)) {
  114. read()
  115. return
  116. }
  117. // This code is unreachable.
  118. // if (!lexStates[parseState]) {
  119. // throw invalidLexState(parseState)
  120. // }
  121. return lexStates[parseState]()
  122. },
  123. comment () {
  124. switch (c) {
  125. case '*':
  126. read()
  127. lexState = 'multiLineComment'
  128. return
  129. case '/':
  130. read()
  131. lexState = 'singleLineComment'
  132. return
  133. }
  134. throw invalidChar(read())
  135. },
  136. multiLineComment () {
  137. switch (c) {
  138. case '*':
  139. read()
  140. lexState = 'multiLineCommentAsterisk'
  141. return
  142. case undefined:
  143. throw invalidChar(read())
  144. }
  145. read()
  146. },
  147. multiLineCommentAsterisk () {
  148. switch (c) {
  149. case '*':
  150. read()
  151. return
  152. case '/':
  153. read()
  154. lexState = 'default'
  155. return
  156. case undefined:
  157. throw invalidChar(read())
  158. }
  159. read()
  160. lexState = 'multiLineComment'
  161. },
  162. singleLineComment () {
  163. switch (c) {
  164. case '\n':
  165. case '\r':
  166. case '\u2028':
  167. case '\u2029':
  168. read()
  169. lexState = 'default'
  170. return
  171. case undefined:
  172. read()
  173. return newToken('eof')
  174. }
  175. read()
  176. },
  177. value () {
  178. switch (c) {
  179. case '{':
  180. case '[':
  181. return newToken('punctuator', read())
  182. case 'n':
  183. read()
  184. literal('ull')
  185. return newToken('null', null)
  186. case 't':
  187. read()
  188. literal('rue')
  189. return newToken('boolean', true)
  190. case 'f':
  191. read()
  192. literal('alse')
  193. return newToken('boolean', false)
  194. case '-':
  195. case '+':
  196. if (read() === '-') {
  197. sign = -1
  198. }
  199. lexState = 'sign'
  200. return
  201. case '.':
  202. buffer = read()
  203. lexState = 'decimalPointLeading'
  204. return
  205. case '0':
  206. buffer = read()
  207. lexState = 'zero'
  208. return
  209. case '1':
  210. case '2':
  211. case '3':
  212. case '4':
  213. case '5':
  214. case '6':
  215. case '7':
  216. case '8':
  217. case '9':
  218. buffer = read()
  219. lexState = 'decimalInteger'
  220. return
  221. case 'I':
  222. read()
  223. literal('nfinity')
  224. return newToken('numeric', Infinity)
  225. case 'N':
  226. read()
  227. literal('aN')
  228. return newToken('numeric', NaN)
  229. case '"':
  230. case "'":
  231. doubleQuote = (read() === '"')
  232. buffer = ''
  233. lexState = 'string'
  234. return
  235. }
  236. throw invalidChar(read())
  237. },
  238. identifierNameStartEscape () {
  239. if (c !== 'u') {
  240. throw invalidChar(read())
  241. }
  242. read()
  243. const u = unicodeEscape()
  244. switch (u) {
  245. case '$':
  246. case '_':
  247. break
  248. default:
  249. if (!util.isIdStartChar(u)) {
  250. throw invalidIdentifier()
  251. }
  252. break
  253. }
  254. buffer += u
  255. lexState = 'identifierName'
  256. },
  257. identifierName () {
  258. switch (c) {
  259. case '$':
  260. case '_':
  261. case '\u200C':
  262. case '\u200D':
  263. buffer += read()
  264. return
  265. case '\\':
  266. read()
  267. lexState = 'identifierNameEscape'
  268. return
  269. }
  270. if (util.isIdContinueChar(c)) {
  271. buffer += read()
  272. return
  273. }
  274. return newToken('identifier', buffer)
  275. },
  276. identifierNameEscape () {
  277. if (c !== 'u') {
  278. throw invalidChar(read())
  279. }
  280. read()
  281. const u = unicodeEscape()
  282. switch (u) {
  283. case '$':
  284. case '_':
  285. case '\u200C':
  286. case '\u200D':
  287. break
  288. default:
  289. if (!util.isIdContinueChar(u)) {
  290. throw invalidIdentifier()
  291. }
  292. break
  293. }
  294. buffer += u
  295. lexState = 'identifierName'
  296. },
  297. sign () {
  298. switch (c) {
  299. case '.':
  300. buffer = read()
  301. lexState = 'decimalPointLeading'
  302. return
  303. case '0':
  304. buffer = read()
  305. lexState = 'zero'
  306. return
  307. case '1':
  308. case '2':
  309. case '3':
  310. case '4':
  311. case '5':
  312. case '6':
  313. case '7':
  314. case '8':
  315. case '9':
  316. buffer = read()
  317. lexState = 'decimalInteger'
  318. return
  319. case 'I':
  320. read()
  321. literal('nfinity')
  322. return newToken('numeric', sign * Infinity)
  323. case 'N':
  324. read()
  325. literal('aN')
  326. return newToken('numeric', NaN)
  327. }
  328. throw invalidChar(read())
  329. },
  330. zero () {
  331. switch (c) {
  332. case '.':
  333. buffer += read()
  334. lexState = 'decimalPoint'
  335. return
  336. case 'e':
  337. case 'E':
  338. buffer += read()
  339. lexState = 'decimalExponent'
  340. return
  341. case 'x':
  342. case 'X':
  343. buffer += read()
  344. lexState = 'hexadecimal'
  345. return
  346. }
  347. return newToken('numeric', sign * 0)
  348. },
  349. decimalInteger () {
  350. switch (c) {
  351. case '.':
  352. buffer += read()
  353. lexState = 'decimalPoint'
  354. return
  355. case 'e':
  356. case 'E':
  357. buffer += read()
  358. lexState = 'decimalExponent'
  359. return
  360. }
  361. if (util.isDigit(c)) {
  362. buffer += read()
  363. return
  364. }
  365. return newToken('numeric', sign * Number(buffer))
  366. },
  367. decimalPointLeading () {
  368. if (util.isDigit(c)) {
  369. buffer += read()
  370. lexState = 'decimalFraction'
  371. return
  372. }
  373. throw invalidChar(read())
  374. },
  375. decimalPoint () {
  376. switch (c) {
  377. case 'e':
  378. case 'E':
  379. buffer += read()
  380. lexState = 'decimalExponent'
  381. return
  382. }
  383. if (util.isDigit(c)) {
  384. buffer += read()
  385. lexState = 'decimalFraction'
  386. return
  387. }
  388. return newToken('numeric', sign * Number(buffer))
  389. },
  390. decimalFraction () {
  391. switch (c) {
  392. case 'e':
  393. case 'E':
  394. buffer += read()
  395. lexState = 'decimalExponent'
  396. return
  397. }
  398. if (util.isDigit(c)) {
  399. buffer += read()
  400. return
  401. }
  402. return newToken('numeric', sign * Number(buffer))
  403. },
  404. decimalExponent () {
  405. switch (c) {
  406. case '+':
  407. case '-':
  408. buffer += read()
  409. lexState = 'decimalExponentSign'
  410. return
  411. }
  412. if (util.isDigit(c)) {
  413. buffer += read()
  414. lexState = 'decimalExponentInteger'
  415. return
  416. }
  417. throw invalidChar(read())
  418. },
  419. decimalExponentSign () {
  420. if (util.isDigit(c)) {
  421. buffer += read()
  422. lexState = 'decimalExponentInteger'
  423. return
  424. }
  425. throw invalidChar(read())
  426. },
  427. decimalExponentInteger () {
  428. if (util.isDigit(c)) {
  429. buffer += read()
  430. return
  431. }
  432. return newToken('numeric', sign * Number(buffer))
  433. },
  434. hexadecimal () {
  435. if (util.isHexDigit(c)) {
  436. buffer += read()
  437. lexState = 'hexadecimalInteger'
  438. return
  439. }
  440. throw invalidChar(read())
  441. },
  442. hexadecimalInteger () {
  443. if (util.isHexDigit(c)) {
  444. buffer += read()
  445. return
  446. }
  447. return newToken('numeric', sign * Number(buffer))
  448. },
  449. string () {
  450. switch (c) {
  451. case '\\':
  452. read()
  453. buffer += escape()
  454. return
  455. case '"':
  456. if (doubleQuote) {
  457. read()
  458. return newToken('string', buffer)
  459. }
  460. buffer += read()
  461. return
  462. case "'":
  463. if (!doubleQuote) {
  464. read()
  465. return newToken('string', buffer)
  466. }
  467. buffer += read()
  468. return
  469. case '\n':
  470. case '\r':
  471. throw invalidChar(read())
  472. case '\u2028':
  473. case '\u2029':
  474. separatorChar(c)
  475. break
  476. case undefined:
  477. throw invalidChar(read())
  478. }
  479. buffer += read()
  480. },
  481. start () {
  482. switch (c) {
  483. case '{':
  484. case '[':
  485. return newToken('punctuator', read())
  486. // This code is unreachable since the default lexState handles eof.
  487. // case undefined:
  488. // return newToken('eof')
  489. }
  490. lexState = 'value'
  491. },
  492. beforePropertyName () {
  493. switch (c) {
  494. case '$':
  495. case '_':
  496. buffer = read()
  497. lexState = 'identifierName'
  498. return
  499. case '\\':
  500. read()
  501. lexState = 'identifierNameStartEscape'
  502. return
  503. case '}':
  504. return newToken('punctuator', read())
  505. case '"':
  506. case "'":
  507. doubleQuote = (read() === '"')
  508. lexState = 'string'
  509. return
  510. }
  511. if (util.isIdStartChar(c)) {
  512. buffer += read()
  513. lexState = 'identifierName'
  514. return
  515. }
  516. throw invalidChar(read())
  517. },
  518. afterPropertyName () {
  519. if (c === ':') {
  520. return newToken('punctuator', read())
  521. }
  522. throw invalidChar(read())
  523. },
  524. beforePropertyValue () {
  525. lexState = 'value'
  526. },
  527. afterPropertyValue () {
  528. switch (c) {
  529. case ',':
  530. case '}':
  531. return newToken('punctuator', read())
  532. }
  533. throw invalidChar(read())
  534. },
  535. beforeArrayValue () {
  536. if (c === ']') {
  537. return newToken('punctuator', read())
  538. }
  539. lexState = 'value'
  540. },
  541. afterArrayValue () {
  542. switch (c) {
  543. case ',':
  544. case ']':
  545. return newToken('punctuator', read())
  546. }
  547. throw invalidChar(read())
  548. },
  549. end () {
  550. // This code is unreachable since it's handled by the default lexState.
  551. // if (c === undefined) {
  552. // read()
  553. // return newToken('eof')
  554. // }
  555. throw invalidChar(read())
  556. },
  557. }
  558. function newToken (type, value) {
  559. return {
  560. type,
  561. value,
  562. line,
  563. column,
  564. }
  565. }
  566. function literal (s) {
  567. for (const c of s) {
  568. const p = peek()
  569. if (p !== c) {
  570. throw invalidChar(read())
  571. }
  572. read()
  573. }
  574. }
  575. function escape () {
  576. const c = peek()
  577. switch (c) {
  578. case 'b':
  579. read()
  580. return '\b'
  581. case 'f':
  582. read()
  583. return '\f'
  584. case 'n':
  585. read()
  586. return '\n'
  587. case 'r':
  588. read()
  589. return '\r'
  590. case 't':
  591. read()
  592. return '\t'
  593. case 'v':
  594. read()
  595. return '\v'
  596. case '0':
  597. read()
  598. if (util.isDigit(peek())) {
  599. throw invalidChar(read())
  600. }
  601. return '\0'
  602. case 'x':
  603. read()
  604. return hexEscape()
  605. case 'u':
  606. read()
  607. return unicodeEscape()
  608. case '\n':
  609. case '\u2028':
  610. case '\u2029':
  611. read()
  612. return ''
  613. case '\r':
  614. read()
  615. if (peek() === '\n') {
  616. read()
  617. }
  618. return ''
  619. case '1':
  620. case '2':
  621. case '3':
  622. case '4':
  623. case '5':
  624. case '6':
  625. case '7':
  626. case '8':
  627. case '9':
  628. throw invalidChar(read())
  629. case undefined:
  630. throw invalidChar(read())
  631. }
  632. return read()
  633. }
  634. function hexEscape () {
  635. let buffer = ''
  636. let c = peek()
  637. if (!util.isHexDigit(c)) {
  638. throw invalidChar(read())
  639. }
  640. buffer += read()
  641. c = peek()
  642. if (!util.isHexDigit(c)) {
  643. throw invalidChar(read())
  644. }
  645. buffer += read()
  646. return String.fromCodePoint(parseInt(buffer, 16))
  647. }
  648. function unicodeEscape () {
  649. let buffer = ''
  650. let count = 4
  651. while (count-- > 0) {
  652. const c = peek()
  653. if (!util.isHexDigit(c)) {
  654. throw invalidChar(read())
  655. }
  656. buffer += read()
  657. }
  658. return String.fromCodePoint(parseInt(buffer, 16))
  659. }
  660. const parseStates = {
  661. start () {
  662. if (token.type === 'eof') {
  663. throw invalidEOF()
  664. }
  665. push()
  666. },
  667. beforePropertyName () {
  668. switch (token.type) {
  669. case 'identifier':
  670. case 'string':
  671. key = token.value
  672. parseState = 'afterPropertyName'
  673. return
  674. case 'punctuator':
  675. // This code is unreachable since it's handled by the lexState.
  676. // if (token.value !== '}') {
  677. // throw invalidToken()
  678. // }
  679. pop()
  680. return
  681. case 'eof':
  682. throw invalidEOF()
  683. }
  684. // This code is unreachable since it's handled by the lexState.
  685. // throw invalidToken()
  686. },
  687. afterPropertyName () {
  688. // This code is unreachable since it's handled by the lexState.
  689. // if (token.type !== 'punctuator' || token.value !== ':') {
  690. // throw invalidToken()
  691. // }
  692. if (token.type === 'eof') {
  693. throw invalidEOF()
  694. }
  695. parseState = 'beforePropertyValue'
  696. },
  697. beforePropertyValue () {
  698. if (token.type === 'eof') {
  699. throw invalidEOF()
  700. }
  701. push()
  702. },
  703. beforeArrayValue () {
  704. if (token.type === 'eof') {
  705. throw invalidEOF()
  706. }
  707. if (token.type === 'punctuator' && token.value === ']') {
  708. pop()
  709. return
  710. }
  711. push()
  712. },
  713. afterPropertyValue () {
  714. // This code is unreachable since it's handled by the lexState.
  715. // if (token.type !== 'punctuator') {
  716. // throw invalidToken()
  717. // }
  718. if (token.type === 'eof') {
  719. throw invalidEOF()
  720. }
  721. switch (token.value) {
  722. case ',':
  723. parseState = 'beforePropertyName'
  724. return
  725. case '}':
  726. pop()
  727. }
  728. // This code is unreachable since it's handled by the lexState.
  729. // throw invalidToken()
  730. },
  731. afterArrayValue () {
  732. // This code is unreachable since it's handled by the lexState.
  733. // if (token.type !== 'punctuator') {
  734. // throw invalidToken()
  735. // }
  736. if (token.type === 'eof') {
  737. throw invalidEOF()
  738. }
  739. switch (token.value) {
  740. case ',':
  741. parseState = 'beforeArrayValue'
  742. return
  743. case ']':
  744. pop()
  745. }
  746. // This code is unreachable since it's handled by the lexState.
  747. // throw invalidToken()
  748. },
  749. end () {
  750. // This code is unreachable since it's handled by the lexState.
  751. // if (token.type !== 'eof') {
  752. // throw invalidToken()
  753. // }
  754. },
  755. }
  756. function push () {
  757. let value
  758. switch (token.type) {
  759. case 'punctuator':
  760. switch (token.value) {
  761. case '{':
  762. value = {}
  763. break
  764. case '[':
  765. value = []
  766. break
  767. }
  768. break
  769. case 'null':
  770. case 'boolean':
  771. case 'numeric':
  772. case 'string':
  773. value = token.value
  774. break
  775. // This code is unreachable.
  776. // default:
  777. // throw invalidToken()
  778. }
  779. if (root === undefined) {
  780. root = value
  781. } else {
  782. const parent = stack[stack.length - 1]
  783. if (Array.isArray(parent)) {
  784. parent.push(value)
  785. } else {
  786. parent[key] = value
  787. }
  788. }
  789. if (value !== null && typeof value === 'object') {
  790. stack.push(value)
  791. if (Array.isArray(value)) {
  792. parseState = 'beforeArrayValue'
  793. } else {
  794. parseState = 'beforePropertyName'
  795. }
  796. } else {
  797. const current = stack[stack.length - 1]
  798. if (current == null) {
  799. parseState = 'end'
  800. } else if (Array.isArray(current)) {
  801. parseState = 'afterArrayValue'
  802. } else {
  803. parseState = 'afterPropertyValue'
  804. }
  805. }
  806. }
  807. function pop () {
  808. stack.pop()
  809. const current = stack[stack.length - 1]
  810. if (current == null) {
  811. parseState = 'end'
  812. } else if (Array.isArray(current)) {
  813. parseState = 'afterArrayValue'
  814. } else {
  815. parseState = 'afterPropertyValue'
  816. }
  817. }
  818. // This code is unreachable.
  819. // function invalidParseState () {
  820. // return new Error(`JSON5: invalid parse state '${parseState}'`)
  821. // }
  822. // This code is unreachable.
  823. // function invalidLexState (state) {
  824. // return new Error(`JSON5: invalid lex state '${state}'`)
  825. // }
  826. function invalidChar (c) {
  827. if (c === undefined) {
  828. return syntaxError(`JSON5: invalid end of input at ${line}:${column}`)
  829. }
  830. return syntaxError(`JSON5: invalid character '${formatChar(c)}' at ${line}:${column}`)
  831. }
  832. function invalidEOF () {
  833. return syntaxError(`JSON5: invalid end of input at ${line}:${column}`)
  834. }
  835. // This code is unreachable.
  836. // function invalidToken () {
  837. // if (token.type === 'eof') {
  838. // return syntaxError(`JSON5: invalid end of input at ${line}:${column}`)
  839. // }
  840. // const c = String.fromCodePoint(token.value.codePointAt(0))
  841. // return syntaxError(`JSON5: invalid character '${formatChar(c)}' at ${line}:${column}`)
  842. // }
  843. function invalidIdentifier () {
  844. column -= 5
  845. return syntaxError(`JSON5: invalid identifier character at ${line}:${column}`)
  846. }
  847. function separatorChar (c) {
  848. console.warn(`JSON5: '${formatChar(c)}' in strings is not valid ECMAScript; consider escaping`)
  849. }
  850. function formatChar (c) {
  851. const replacements = {
  852. "'": "\\'",
  853. '"': '\\"',
  854. '\\': '\\\\',
  855. '\b': '\\b',
  856. '\f': '\\f',
  857. '\n': '\\n',
  858. '\r': '\\r',
  859. '\t': '\\t',
  860. '\v': '\\v',
  861. '\0': '\\0',
  862. '\u2028': '\\u2028',
  863. '\u2029': '\\u2029',
  864. }
  865. if (replacements[c]) {
  866. return replacements[c]
  867. }
  868. if (c < ' ') {
  869. const hexString = c.charCodeAt(0).toString(16)
  870. return '\\x' + ('00' + hexString).substring(hexString.length)
  871. }
  872. return c
  873. }
  874. function syntaxError (message) {
  875. const err = new SyntaxError(message)
  876. err.lineNumber = line
  877. err.columnNumber = column
  878. return err
  879. }