126 lines
4.7 KiB
JavaScript
126 lines
4.7 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
|
|
var xml_json_1 = __importDefault(require("./maps/xml.json"));
|
|
var encode_trie_1 = require("./encode-trie");
|
|
var entities_json_1 = __importDefault(require("./maps/entities.json"));
|
|
var htmlReplacer = getCharRegExp(entities_json_1.default, true);
|
|
var xmlReplacer = getCharRegExp(xml_json_1.default, true);
|
|
var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);
|
|
var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [
|
|
xml_json_1.default[k].charCodeAt(0),
|
|
"&" + k + ";",
|
|
]; }));
|
|
/**
|
|
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
* documents using XML entities.
|
|
*
|
|
* If a character has no equivalent entity, a
|
|
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
*/
|
|
function encodeXML(str) {
|
|
var ret = "";
|
|
var lastIdx = 0;
|
|
var match;
|
|
while ((match = xmlReplacer.exec(str)) !== null) {
|
|
var i = match.index;
|
|
var char = str.charCodeAt(i);
|
|
var next = xmlCodeMap.get(char);
|
|
if (next) {
|
|
ret += str.substring(lastIdx, i) + next;
|
|
lastIdx = i + 1;
|
|
}
|
|
else {
|
|
ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";
|
|
// Increase by 1 if we have a surrogate pair
|
|
lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
|
|
}
|
|
}
|
|
return ret + str.substr(lastIdx);
|
|
}
|
|
exports.encodeXML = encodeXML;
|
|
/**
|
|
* Encodes all entities and non-ASCII characters in the input.
|
|
*
|
|
* This includes characters that are valid ASCII characters in HTML documents.
|
|
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
* consider using the `encodeNonAsciiHTML` function.
|
|
*
|
|
* If a character has no equivalent entity, a
|
|
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
*/
|
|
function encodeHTML(data) {
|
|
return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);
|
|
}
|
|
exports.encodeHTML = encodeHTML;
|
|
/**
|
|
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
* documents using HTML entities.
|
|
*
|
|
* If a character has no equivalent entity, a
|
|
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
*/
|
|
function encodeNonAsciiHTML(data) {
|
|
return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);
|
|
}
|
|
exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
|
|
function getCharRegExp(map, nonAscii) {
|
|
// Collect the start characters of all entities
|
|
var chars = Object.keys(map)
|
|
.map(function (k) { return "\\" + map[k].charAt(0); })
|
|
.filter(function (v) { return !nonAscii || v.charCodeAt(1) < 128; })
|
|
.sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })
|
|
// Remove duplicates
|
|
.filter(function (v, i, a) { return v !== a[i + 1]; });
|
|
// Add ranges to single characters.
|
|
for (var start = 0; start < chars.length - 1; start++) {
|
|
// Find the end of a run of characters
|
|
var end = start;
|
|
while (end < chars.length - 1 &&
|
|
chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {
|
|
end += 1;
|
|
}
|
|
var count = 1 + end - start;
|
|
// We want to replace at least three characters
|
|
if (count < 3)
|
|
continue;
|
|
chars.splice(start, count, chars[start] + "-" + chars[end]);
|
|
}
|
|
return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");
|
|
}
|
|
/**
|
|
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
*
|
|
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
* of reduced transportability.
|
|
*
|
|
* @param data String to escape.
|
|
*/
|
|
exports.escape = encodeXML;
|
|
/**
|
|
* Encodes all characters not valid in XML documents using XML entities.
|
|
*
|
|
* Note that the output will be character-set dependent.
|
|
*
|
|
* @param data String to escape.
|
|
*/
|
|
function escapeUTF8(data) {
|
|
var match;
|
|
var lastIdx = 0;
|
|
var result = "";
|
|
while ((match = xmlInvalidChars.exec(data))) {
|
|
if (lastIdx !== match.index) {
|
|
result += data.substring(lastIdx, match.index);
|
|
}
|
|
// We know that this chararcter will be in `inverseXML`
|
|
result += xmlCodeMap.get(match[0].charCodeAt(0));
|
|
// Every match will be of length 1
|
|
lastIdx = match.index + 1;
|
|
}
|
|
return result + data.substring(lastIdx);
|
|
}
|
|
exports.escapeUTF8 = escapeUTF8;
|