263 lines
6.7 KiB
JavaScript
263 lines
6.7 KiB
JavaScript
'use strict'
|
||
|
||
var alphanumeric = require('is-alphanumerical')
|
||
var alphabetical = require('is-alphabetical')
|
||
var decimal = require('is-decimal')
|
||
var regular = require('./regular.json')
|
||
var normal = require('./normalize.json')
|
||
|
||
module.exports = parse
|
||
|
||
var own = {}.hasOwnProperty
|
||
|
||
// Parse a BCP 47 language tag.
|
||
/* eslint-disable-next-line complexity */
|
||
function parse(tag, options) {
|
||
var settings = options || {}
|
||
var result = empty()
|
||
var source = String(tag)
|
||
var value = source.toLowerCase()
|
||
var index = 0
|
||
var start
|
||
var groups
|
||
var offset
|
||
|
||
// Check input.
|
||
if (tag == null) {
|
||
throw new Error('Expected string, got `' + tag + '`')
|
||
}
|
||
|
||
// Let’s start.
|
||
// First: the edge cases.
|
||
if (own.call(normal, value)) {
|
||
if ((settings.normalize == null || settings.normalize) && normal[value]) {
|
||
return parse(normal[value])
|
||
}
|
||
|
||
result[regular.indexOf(value) === -1 ? 'irregular' : 'regular'] = source
|
||
|
||
return result
|
||
}
|
||
|
||
// Now, to actually parse, eat what could be a language.
|
||
while (alphabetical(value.charCodeAt(index)) && index < 9) index++
|
||
|
||
// A language.
|
||
if (index > 1 /* Min 639. */ && index < 9 /* Max subtag. */) {
|
||
// 5 and up is a subtag.
|
||
// 4 is the size of reserved languages.
|
||
// 3 an ISO 639-2 or ISO 639-3.
|
||
// 2 is an ISO 639-1.
|
||
// <https://github.com/wooorm/iso-639-2>
|
||
// <https://github.com/wooorm/iso-639-3>
|
||
result.language = source.slice(0, index)
|
||
|
||
if (index < 4 /* Max 639. */) {
|
||
groups = 0
|
||
|
||
while (
|
||
value.charCodeAt(index) === 45 /* `-` */ &&
|
||
alphabetical(value.charCodeAt(index + 1)) &&
|
||
alphabetical(value.charCodeAt(index + 2)) &&
|
||
alphabetical(value.charCodeAt(index + 3)) &&
|
||
!alphabetical(value.charCodeAt(index + 4))
|
||
) {
|
||
if (groups > 2 /* Max extended language subtag count. */) {
|
||
return fail(
|
||
index,
|
||
3,
|
||
'Too many extended language subtags, expected at most 3 subtags'
|
||
)
|
||
}
|
||
|
||
// Extended language subtag.
|
||
result.extendedLanguageSubtags.push(source.slice(index + 1, index + 4))
|
||
index += 4
|
||
groups++
|
||
}
|
||
}
|
||
|
||
// ISO 15924 script.
|
||
// <https://github.com/wooorm/iso-15924>
|
||
if (
|
||
value.charCodeAt(index) === 45 /* `-` */ &&
|
||
alphabetical(value.charCodeAt(index + 1)) &&
|
||
alphabetical(value.charCodeAt(index + 2)) &&
|
||
alphabetical(value.charCodeAt(index + 3)) &&
|
||
alphabetical(value.charCodeAt(index + 4)) &&
|
||
!alphabetical(value.charCodeAt(index + 5))
|
||
) {
|
||
result.script = source.slice(index + 1, index + 5)
|
||
index += 5
|
||
}
|
||
|
||
if (value.charCodeAt(index) === 45 /* `-` */) {
|
||
// ISO 3166-1 region.
|
||
// <https://github.com/wooorm/iso-3166>
|
||
if (
|
||
alphabetical(value.charCodeAt(index + 1)) &&
|
||
alphabetical(value.charCodeAt(index + 2)) &&
|
||
!alphabetical(value.charCodeAt(index + 3))
|
||
) {
|
||
result.region = source.slice(index + 1, index + 3)
|
||
index += 3
|
||
}
|
||
// UN M49 region.
|
||
// <https://github.com/wooorm/un-m49>
|
||
else if (
|
||
decimal(value.charCodeAt(index + 1)) &&
|
||
decimal(value.charCodeAt(index + 2)) &&
|
||
decimal(value.charCodeAt(index + 3)) &&
|
||
!decimal(value.charCodeAt(index + 4))
|
||
) {
|
||
result.region = source.slice(index + 1, index + 4)
|
||
index += 4
|
||
}
|
||
}
|
||
|
||
while (value.charCodeAt(index) === 45 /* `-` */) {
|
||
offset = start = index + 1
|
||
|
||
while (alphanumeric(value.charCodeAt(offset))) {
|
||
if (offset - start > 7 /* Max variant. */) {
|
||
return fail(
|
||
offset,
|
||
1,
|
||
'Too long variant, expected at most 8 characters'
|
||
)
|
||
}
|
||
|
||
offset++
|
||
}
|
||
|
||
if (
|
||
// Long variant.
|
||
offset - start > 4 /* Min alpha numeric variant. */ ||
|
||
// Short variant.
|
||
(offset - start > 3 /* Min variant. */ &&
|
||
decimal(value.charCodeAt(start)))
|
||
) {
|
||
result.variants.push(source.slice(start, offset))
|
||
index = offset
|
||
}
|
||
// Something else.
|
||
else {
|
||
break
|
||
}
|
||
}
|
||
|
||
// Extensions.
|
||
while (value.charCodeAt(index) === 45 /* `-` */) {
|
||
// Exit if this isn’t an extension.
|
||
if (
|
||
value.charCodeAt(index + 1) === 120 /* `x` */ ||
|
||
!alphanumeric(value.charCodeAt(index + 1)) ||
|
||
value.charCodeAt(index + 2) !== 45 /* `-` */ ||
|
||
!alphanumeric(value.charCodeAt(index + 3))
|
||
) {
|
||
break
|
||
}
|
||
|
||
offset = index + 2
|
||
groups = 0
|
||
|
||
while (
|
||
value.charCodeAt(offset) === 45 /* `-` */ &&
|
||
alphanumeric(value.charCodeAt(offset + 1)) &&
|
||
alphanumeric(value.charCodeAt(offset + 2))
|
||
) {
|
||
start = offset + 1
|
||
offset = start + 2
|
||
groups++
|
||
|
||
while (alphanumeric(value.charCodeAt(offset))) {
|
||
if (offset - start > 7 /* Max extension. */) {
|
||
return fail(
|
||
offset,
|
||
2,
|
||
'Too long extension, expected at most 8 characters'
|
||
)
|
||
}
|
||
|
||
offset++
|
||
}
|
||
}
|
||
|
||
if (!groups) {
|
||
return fail(
|
||
offset,
|
||
4,
|
||
'Empty extension, extensions must have at least 2 characters of content'
|
||
)
|
||
}
|
||
|
||
result.extensions.push({
|
||
singleton: source.charAt(index + 1),
|
||
extensions: source.slice(index + 3, offset).split('-')
|
||
})
|
||
|
||
index = offset
|
||
}
|
||
}
|
||
// Not a language.
|
||
else {
|
||
index = 0
|
||
}
|
||
|
||
// Private use.
|
||
if (
|
||
(index === 0 && value.charCodeAt(index) === 120) /* `x` */ ||
|
||
(value.charCodeAt(index) === 45 /* `-` */ &&
|
||
value.charCodeAt(index + 1) === 120) /* `x` */
|
||
) {
|
||
offset = index = index ? index + 2 : 1
|
||
|
||
while (
|
||
value.charCodeAt(offset) === 45 /* `-` */ &&
|
||
alphanumeric(value.charCodeAt(offset + 1))
|
||
) {
|
||
offset = start = index + 1
|
||
|
||
while (alphanumeric(value.charCodeAt(offset))) {
|
||
if (offset - start > 7 /* Max private use. */) {
|
||
return fail(
|
||
offset,
|
||
5,
|
||
'Too long private-use area, expected at most 8 characters'
|
||
)
|
||
}
|
||
|
||
offset++
|
||
}
|
||
|
||
result.privateuse.push(source.slice(index + 1, offset))
|
||
index = offset
|
||
}
|
||
}
|
||
|
||
if (index !== source.length) {
|
||
return fail(index, 6, 'Found superfluous content after tag')
|
||
}
|
||
|
||
return result
|
||
|
||
function fail(offset, code, reason) {
|
||
if (settings.warning) settings.warning(reason, code, offset)
|
||
return settings.forgiving ? result : empty()
|
||
}
|
||
}
|
||
|
||
// Create an empty results object.
|
||
function empty() {
|
||
return {
|
||
language: null,
|
||
extendedLanguageSubtags: [],
|
||
script: null,
|
||
region: null,
|
||
variants: [],
|
||
extensions: [],
|
||
privateuse: [],
|
||
irregular: null,
|
||
regular: null
|
||
}
|
||
}
|