decoder.js 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. function con(b) {
  2. if ((b & 0xc0) === 0x80) {
  3. return b & 0x3f;
  4. } else {
  5. throw new Error("invalid UTF-8 encoding");
  6. }
  7. }
  8. function code(min, n) {
  9. if (n < min || (0xd800 <= n && n < 0xe000) || n >= 0x10000) {
  10. throw new Error("invalid UTF-8 encoding");
  11. } else {
  12. return n;
  13. }
  14. }
  15. export function decode(bytes) {
  16. return _decode(bytes)
  17. .map(x => String.fromCharCode(x))
  18. .join("");
  19. }
  20. function _decode(bytes) {
  21. if (bytes.length === 0) {
  22. return [];
  23. }
  24. /**
  25. * 1 byte
  26. */
  27. {
  28. const [b1, ...bs] = bytes;
  29. if (b1 < 0x80) {
  30. return [code(0x0, b1), ..._decode(bs)];
  31. }
  32. if (b1 < 0xc0) {
  33. throw new Error("invalid UTF-8 encoding");
  34. }
  35. }
  36. /**
  37. * 2 bytes
  38. */
  39. {
  40. const [b1, b2, ...bs] = bytes;
  41. if (b1 < 0xe0) {
  42. return [code(0x80, ((b1 & 0x1f) << 6) + con(b2)), ..._decode(bs)];
  43. }
  44. }
  45. /**
  46. * 3 bytes
  47. */
  48. {
  49. const [b1, b2, b3, ...bs] = bytes;
  50. if (b1 < 0xf0) {
  51. return [
  52. code(0x800, ((b1 & 0x0f) << 12) + (con(b2) << 6) + con(b3)),
  53. ..._decode(bs)
  54. ];
  55. }
  56. }
  57. /**
  58. * 4 bytes
  59. */
  60. {
  61. const [b1, b2, b3, b4, ...bs] = bytes;
  62. if (b1 < 0xf8) {
  63. return [
  64. code(
  65. 0x10000,
  66. ((((b1 & 0x07) << 18) + con(b2)) << 12) + (con(b3) << 6) + con(b4)
  67. ),
  68. ..._decode(bs)
  69. ];
  70. }
  71. }
  72. throw new Error("invalid UTF-8 encoding");
  73. }