epiphany/node_modules/bcp-47/lib/parse.js

'use strict'

var alphanumeric = require('is-alphanumerical')
var alphabetical = require('is-alphabetical')
var decimal = require('is-decimal')
var regular = require('./regular.json')
var normal = require('./normalize.json')

module.exports = parse

var own = {}.hasOwnProperty

// Parse a BCP 47 language tag.
/* eslint-disable-next-line complexity */
function parse(tag, options) {
  var settings = options || {}
  var result = empty()
  var source = String(tag)
  var value = source.toLowerCase()
  var index = 0
  var start
  var groups
  var offset

  // Check input.
  if (tag == null) {
    throw new Error('Expected string, got `' + tag + '`')
  }

  // Let’s start.
  // First: the edge cases.
  if (own.call(normal, value)) {
    if ((settings.normalize == null || settings.normalize) && normal[value]) {
      return parse(normal[value])
    }

    result[regular.indexOf(value) === -1 ? 'irregular' : 'regular'] = source

    return result
  }

  // Now, to actually parse, eat what could be a language.
  while (alphabetical(value.charCodeAt(index)) && index < 9) index++

  // A language.
  if (index > 1 /* Min 639. */ && index < 9 /* Max subtag. */) {
    // 5 and up is a subtag.
    // 4 is the size of reserved languages.
    // 3 an ISO 639-2 or ISO 639-3.
    // 2 is an ISO 639-1.
    // <https://github.com/wooorm/iso-639-2>
    // <https://github.com/wooorm/iso-639-3>
    result.language = source.slice(0, index)

    if (index < 4 /* Max 639. */) {
      groups = 0

      while (
        value.charCodeAt(index) === 45 /* `-` */ &&
        alphabetical(value.charCodeAt(index + 1)) &&
        alphabetical(value.charCodeAt(index + 2)) &&
        alphabetical(value.charCodeAt(index + 3)) &&
        !alphabetical(value.charCodeAt(index + 4))
      ) {
        if (groups > 2 /* Max extended language subtag count. */) {
          return fail(
            index,
            3,
            'Too many extended language subtags, expected at most 3 subtags'
          )
        }

        // Extended language subtag.
        result.extendedLanguageSubtags.push(source.slice(index + 1, index + 4))
        index += 4
        groups++
      }
    }

    // ISO 15924 script.
    // <https://github.com/wooorm/iso-15924>
    if (
      value.charCodeAt(index) === 45 /* `-` */ &&
      alphabetical(value.charCodeAt(index + 1)) &&
      alphabetical(value.charCodeAt(index + 2)) &&
      alphabetical(value.charCodeAt(index + 3)) &&
      alphabetical(value.charCodeAt(index + 4)) &&
      !alphabetical(value.charCodeAt(index + 5))
    ) {
      result.script = source.slice(index + 1, index + 5)
      index += 5
    }

    if (value.charCodeAt(index) === 45 /* `-` */) {
      // ISO 3166-1 region.
      // <https://github.com/wooorm/iso-3166>
      if (
        alphabetical(value.charCodeAt(index + 1)) &&
        alphabetical(value.charCodeAt(index + 2)) &&
        !alphabetical(value.charCodeAt(index + 3))
      ) {
        result.region = source.slice(index + 1, index + 3)
        index += 3
      }
      // UN M49 region.
      // <https://github.com/wooorm/un-m49>
      else if (
        decimal(value.charCodeAt(index + 1)) &&
        decimal(value.charCodeAt(index + 2)) &&
        decimal(value.charCodeAt(index + 3)) &&
        !decimal(value.charCodeAt(index + 4))
      ) {
        result.region = source.slice(index + 1, index + 4)
        index += 4
      }
    }

    while (value.charCodeAt(index) === 45 /* `-` */) {
      offset = start = index + 1

      while (alphanumeric(value.charCodeAt(offset))) {
        if (offset - start > 7 /* Max variant. */) {
          return fail(
            offset,
            1,
            'Too long variant, expected at most 8 characters'
          )
        }

        offset++
      }

      if (
        // Long variant.
        offset - start > 4 /* Min alpha numeric variant. */ ||
        // Short variant.
        (offset - start > 3 /* Min variant. */ &&
          decimal(value.charCodeAt(start)))
      ) {
        result.variants.push(source.slice(start, offset))
        index = offset
      }
      // Something else.
      else {
        break
      }
    }

    // Extensions.
    while (value.charCodeAt(index) === 45 /* `-` */) {
      // Exit if this isn’t an extension.
      if (
        value.charCodeAt(index + 1) === 120 /* `x` */ ||
        !alphanumeric(value.charCodeAt(index + 1)) ||
        value.charCodeAt(index + 2) !== 45 /* `-` */ ||
        !alphanumeric(value.charCodeAt(index + 3))
      ) {
        break
      }

      offset = index + 2
      groups = 0

      while (
        value.charCodeAt(offset) === 45 /* `-` */ &&
        alphanumeric(value.charCodeAt(offset + 1)) &&
        alphanumeric(value.charCodeAt(offset + 2))
      ) {
        start = offset + 1
        offset = start + 2
        groups++

        while (alphanumeric(value.charCodeAt(offset))) {
          if (offset - start > 7 /* Max extension. */) {
            return fail(
              offset,
              2,
              'Too long extension, expected at most 8 characters'
            )
          }

          offset++
        }
      }

      if (!groups) {
        return fail(
          offset,
          4,
          'Empty extension, extensions must have at least 2 characters of content'
        )
      }

      result.extensions.push({
        singleton: source.charAt(index + 1),
        extensions: source.slice(index + 3, offset).split('-')
      })

      index = offset
    }
  }
  // Not a language.
  else {
    index = 0
  }

  // Private use.
  if (
    (index === 0 && value.charCodeAt(index) === 120) /* `x` */ ||
    (value.charCodeAt(index) === 45 /* `-` */ &&
      value.charCodeAt(index + 1) === 120) /* `x` */
  ) {
    offset = index = index ? index + 2 : 1

    while (
      value.charCodeAt(offset) === 45 /* `-` */ &&
      alphanumeric(value.charCodeAt(offset + 1))
    ) {
      offset = start = index + 1

      while (alphanumeric(value.charCodeAt(offset))) {
        if (offset - start > 7 /* Max private use. */) {
          return fail(
            offset,
            5,
            'Too long private-use area, expected at most 8 characters'
          )
        }

        offset++
      }

      result.privateuse.push(source.slice(index + 1, offset))
      index = offset
    }
  }

  if (index !== source.length) {
    return fail(index, 6, 'Found superfluous content after tag')
  }

  return result

  function fail(offset, code, reason) {
    if (settings.warning) settings.warning(reason, code, offset)
    return settings.forgiving ? result : empty()
  }
}

// Create an empty results object.
function empty() {
  return {
    language: null,
    extendedLanguageSubtags: [],
    script: null,
    region: null,
    variants: [],
    extensions: [],
    privateuse: [],
    irregular: null,
    regular: null
  }
}