123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- 'use strict';
- // based on https://github.com/bestiejs/punycode.js/blob/master/punycode.js
- var global = require('../internals/global');
- var uncurryThis = require('../internals/function-uncurry-this');
- var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
- var base = 36;
- var tMin = 1;
- var tMax = 26;
- var skew = 38;
- var damp = 700;
- var initialBias = 72;
- var initialN = 128; // 0x80
- var delimiter = '-'; // '\x2D'
- var regexNonASCII = /[^\0-\u007E]/; // non-ASCII chars
- var regexSeparators = /[.\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
- var OVERFLOW_ERROR = 'Overflow: input needs wider integers to process';
- var baseMinusTMin = base - tMin;
- var RangeError = global.RangeError;
- var exec = uncurryThis(regexSeparators.exec);
- var floor = Math.floor;
- var fromCharCode = String.fromCharCode;
- var charCodeAt = uncurryThis(''.charCodeAt);
- var join = uncurryThis([].join);
- var push = uncurryThis([].push);
- var replace = uncurryThis(''.replace);
- var split = uncurryThis(''.split);
- var toLowerCase = uncurryThis(''.toLowerCase);
- /**
- * Creates an array containing the numeric code points of each Unicode
- * character in the string. While JavaScript uses UCS-2 internally,
- * this function will convert a pair of surrogate halves (each of which
- * UCS-2 exposes as separate characters) into a single code point,
- * matching UTF-16.
- */
- var ucs2decode = function (string) {
- var output = [];
- var counter = 0;
- var length = string.length;
- while (counter < length) {
- var value = charCodeAt(string, counter++);
- if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
- // It's a high surrogate, and there is a next character.
- var extra = charCodeAt(string, counter++);
- if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
- push(output, ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
- } else {
- // It's an unmatched surrogate; only append this code unit, in case the
- // next code unit is the high surrogate of a surrogate pair.
- push(output, value);
- counter--;
- }
- } else {
- push(output, value);
- }
- }
- return output;
- };
- /**
- * Converts a digit/integer into a basic code point.
- */
- var digitToBasic = function (digit) {
- // 0..25 map to ASCII a..z or A..Z
- // 26..35 map to ASCII 0..9
- return digit + 22 + 75 * (digit < 26);
- };
- /**
- * Bias adaptation function as per section 3.4 of RFC 3492.
- * https://tools.ietf.org/html/rfc3492#section-3.4
- */
- var adapt = function (delta, numPoints, firstTime) {
- var k = 0;
- delta = firstTime ? floor(delta / damp) : delta >> 1;
- delta += floor(delta / numPoints);
- while (delta > baseMinusTMin * tMax >> 1) {
- delta = floor(delta / baseMinusTMin);
- k += base;
- }
- return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
- };
- /**
- * Converts a string of Unicode symbols (e.g. a domain name label) to a
- * Punycode string of ASCII-only symbols.
- */
- var encode = function (input) {
- var output = [];
- // Convert the input in UCS-2 to an array of Unicode code points.
- input = ucs2decode(input);
- // Cache the length.
- var inputLength = input.length;
- // Initialize the state.
- var n = initialN;
- var delta = 0;
- var bias = initialBias;
- var i, currentValue;
- // Handle the basic code points.
- for (i = 0; i < input.length; i++) {
- currentValue = input[i];
- if (currentValue < 0x80) {
- push(output, fromCharCode(currentValue));
- }
- }
- var basicLength = output.length; // number of basic code points.
- var handledCPCount = basicLength; // number of code points that have been handled;
- // Finish the basic string with a delimiter unless it's empty.
- if (basicLength) {
- push(output, delimiter);
- }
- // Main encoding loop:
- while (handledCPCount < inputLength) {
- // All non-basic code points < n have been handled already. Find the next larger one:
- var m = maxInt;
- for (i = 0; i < input.length; i++) {
- currentValue = input[i];
- if (currentValue >= n && currentValue < m) {
- m = currentValue;
- }
- }
- // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, but guard against overflow.
- var handledCPCountPlusOne = handledCPCount + 1;
- if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
- throw RangeError(OVERFLOW_ERROR);
- }
- delta += (m - n) * handledCPCountPlusOne;
- n = m;
- for (i = 0; i < input.length; i++) {
- currentValue = input[i];
- if (currentValue < n && ++delta > maxInt) {
- throw RangeError(OVERFLOW_ERROR);
- }
- if (currentValue == n) {
- // Represent delta as a generalized variable-length integer.
- var q = delta;
- var k = base;
- while (true) {
- var t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
- if (q < t) break;
- var qMinusT = q - t;
- var baseMinusT = base - t;
- push(output, fromCharCode(digitToBasic(t + qMinusT % baseMinusT)));
- q = floor(qMinusT / baseMinusT);
- k += base;
- }
- push(output, fromCharCode(digitToBasic(q)));
- bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
- delta = 0;
- handledCPCount++;
- }
- }
- delta++;
- n++;
- }
- return join(output, '');
- };
- module.exports = function (input) {
- var encoded = [];
- var labels = split(replace(toLowerCase(input), regexSeparators, '\u002E'), '.');
- var i, label;
- for (i = 0; i < labels.length; i++) {
- label = labels[i];
- push(encoded, exec(regexNonASCII, label) ? 'xn--' + encode(label) : label);
- }
- return join(encoded, '.');
- };
|