more
This commit is contained in:
dopeuni444
2025-07-31 12:23:33 +04:00
parent 20b46678b7
commit b5a22951ae
3401 changed files with 331100 additions and 0 deletions

18
unified-ai-platform/node_modules/graphemer/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright 2020 Filament (Anomalous Technologies Limited)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,958 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const boundaries_1 = require("./boundaries");
const GraphemerHelper_1 = __importDefault(require("./GraphemerHelper"));
const GraphemerIterator_1 = __importDefault(require("./GraphemerIterator"));
class Graphemer {
/**
* Returns the next grapheme break in the string after the given index
* @param string {string}
* @param index {number}
* @returns {number}
*/
static nextBreak(string, index) {
if (index === undefined) {
index = 0;
}
if (index < 0) {
return 0;
}
if (index >= string.length - 1) {
return string.length;
}
const prevCP = GraphemerHelper_1.default.codePointAt(string, index);
const prev = Graphemer.getGraphemeBreakProperty(prevCP);
const prevEmoji = Graphemer.getEmojiProperty(prevCP);
const mid = [];
const midEmoji = [];
for (let i = index + 1; i < string.length; i++) {
// check for already processed low surrogates
if (GraphemerHelper_1.default.isSurrogate(string, i - 1)) {
continue;
}
const nextCP = GraphemerHelper_1.default.codePointAt(string, i);
const next = Graphemer.getGraphemeBreakProperty(nextCP);
const nextEmoji = Graphemer.getEmojiProperty(nextCP);
if (GraphemerHelper_1.default.shouldBreak(prev, mid, next, prevEmoji, midEmoji, nextEmoji)) {
return i;
}
mid.push(next);
midEmoji.push(nextEmoji);
}
return string.length;
}
/**
* Breaks the given string into an array of grapheme clusters
* @param str {string}
* @returns {string[]}
*/
splitGraphemes(str) {
const res = [];
let index = 0;
let brk;
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
res.push(str.slice(index, brk));
index = brk;
}
if (index < str.length) {
res.push(str.slice(index));
}
return res;
}
/**
* Returns an iterator of grapheme clusters in the given string
* @param str {string}
* @returns {GraphemerIterator}
*/
iterateGraphemes(str) {
return new GraphemerIterator_1.default(str, Graphemer.nextBreak);
}
/**
* Returns the number of grapheme clusters in the given string
* @param str {string}
* @returns {number}
*/
countGraphemes(str) {
let count = 0;
let index = 0;
let brk;
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
index = brk;
count++;
}
if (index < str.length) {
count++;
}
return count;
}
/**
* Given a Unicode code point, determines this symbol's grapheme break property
* @param code {number} Unicode code point
* @returns {number}
*/
static getGraphemeBreakProperty(code) {
// Grapheme break property taken from:
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
// and generated by
// node ./scripts/generate-grapheme-break.js
if (code < 0xbf09) {
if (code < 0xac54) {
if (code < 0x102d) {
if (code < 0xb02) {
if (code < 0x93b) {
if (code < 0x6df) {
if (code < 0x5bf) {
if (code < 0x7f) {
if (code < 0xb) {
if (code < 0xa) {
// Cc [10] <control-0000>..<control-0009>
if (0x0 <= code && code <= 0x9) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
else {
// Cc <control-000A>
if (0xa === code) {
return boundaries_1.CLUSTER_BREAK.LF;
}
}
}
else {
if (code < 0xd) {
// Cc [2] <control-000B>..<control-000C>
if (0xb <= code && code <= 0xc) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
else {
if (code < 0xe) {
// Cc <control-000D>
if (0xd === code) {
return boundaries_1.CLUSTER_BREAK.CR;
}
}
else {
// Cc [18] <control-000E>..<control-001F>
if (0xe <= code && code <= 0x1f) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
}
}
}
else {
if (code < 0x300) {
if (code < 0xad) {
// Cc [33] <control-007F>..<control-009F>
if (0x7f <= code && code <= 0x9f) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
else {
// Cf SOFT HYPHEN
if (0xad === code) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
}
else {
if (code < 0x483) {
// Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
if (0x300 <= code && code <= 0x36f) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x591) {
// Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
// Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
if (0x483 <= code && code <= 0x489) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
if (0x591 <= code && code <= 0x5bd) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
else {
if (code < 0x610) {
if (code < 0x5c4) {
if (code < 0x5c1) {
// Mn HEBREW POINT RAFE
if (0x5bf === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
if (0x5c1 <= code && code <= 0x5c2) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0x5c7) {
// Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
if (0x5c4 <= code && code <= 0x5c5) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x600) {
// Mn HEBREW POINT QAMATS QATAN
if (0x5c7 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
if (0x600 <= code && code <= 0x605) {
return boundaries_1.CLUSTER_BREAK.PREPEND;
}
}
}
}
}
else {
if (code < 0x670) {
if (code < 0x61c) {
// Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
if (0x610 <= code && code <= 0x61a) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x64b) {
// Cf ARABIC LETTER MARK
if (0x61c === code) {
return boundaries_1.CLUSTER_BREAK.CONTROL;
}
}
else {
// Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
if (0x64b <= code && code <= 0x65f) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0x6d6) {
// Mn ARABIC LETTER SUPERSCRIPT ALEF
if (0x670 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x6dd) {
// Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
if (0x6d6 <= code && code <= 0x6dc) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Cf ARABIC END OF AYAH
if (0x6dd === code) {
return boundaries_1.CLUSTER_BREAK.PREPEND;
}
}
}
}
}
}
}
else {
if (code < 0x81b) {
if (code < 0x730) {
if (code < 0x6ea) {
if (code < 0x6e7) {
// Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
if (0x6df <= code && code <= 0x6e4) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
if (0x6e7 <= code && code <= 0x6e8) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0x70f) {
// Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
if (0x6ea <= code && code <= 0x6ed) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Cf SYRIAC ABBREVIATION MARK
if (0x70f === code) {
return boundaries_1.CLUSTER_BREAK.PREPEND;
}
// Mn SYRIAC LETTER SUPERSCRIPT ALAPH
if (0x711 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0x7eb) {
if (code < 0x7a6) {
// Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
if (0x730 <= code && code <= 0x74a) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [11] THAANA ABAFILI..THAANA SUKUN
if (0x7a6 <= code && code <= 0x7b0) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0x7fd) {
// Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
if (0x7eb <= code && code <= 0x7f3) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x816) {
// Mn NKO DANTAYALAN
if (0x7fd === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
if (0x816 <= code && code <= 0x819) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
else {
if (code < 0x898) {
if (code < 0x829) {
if (code < 0x825) {
// Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
if (0x81b <= code && code <= 0x823) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
if (0x825 <= code && code <= 0x827) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0x859) {
// Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
if (0x829 <= code && code <= 0x82d) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x890) {
// Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
if (0x859 <= code && code <= 0x85b) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
if (0x890 <= code && code <= 0x891) {
return boundaries_1.CLUSTER_BREAK.PREPEND;
}
}
}
}
}
else {
if (code < 0x8e3) {
if (code < 0x8ca) {
// Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
if (0x898 <= code && code <= 0x89f) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x8e2) {
// Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
if (0x8ca <= code && code <= 0x8e1) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Cf ARABIC DISPUTED END OF AYAH
if (0x8e2 === code) {
return boundaries_1.CLUSTER_BREAK.PREPEND;
}
}
}
}
else {
if (code < 0x903) {
// Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
if (0x8e3 <= code && code <= 0x902) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc DEVANAGARI SIGN VISARGA
if (0x903 === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
// Mn DEVANAGARI VOWEL SIGN OE
if (0x93a === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
}
else {
if (code < 0xa01) {
if (code < 0x982) {
if (code < 0x94d) {
if (code < 0x93e) {
// Mc DEVANAGARI VOWEL SIGN OOE
if (0x93b === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
// Mn DEVANAGARI SIGN NUKTA
if (0x93c === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x941) {
// Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
if (0x93e <= code && code <= 0x940) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0x949) {
// Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
if (0x941 <= code && code <= 0x948) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
if (0x949 <= code && code <= 0x94c) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
}
}
else {
if (code < 0x951) {
if (code < 0x94e) {
// Mn DEVANAGARI SIGN VIRAMA
if (0x94d === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
if (0x94e <= code && code <= 0x94f) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
else {
if (code < 0x962) {
// Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
if (0x951 <= code && code <= 0x957) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x981) {
// Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
if (0x962 <= code && code <= 0x963) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn BENGALI SIGN CANDRABINDU
if (0x981 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
else {
if (code < 0x9c7) {
if (code < 0x9be) {
if (code < 0x9bc) {
// Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
if (0x982 <= code && code <= 0x983) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn BENGALI SIGN NUKTA
if (0x9bc === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0x9bf) {
// Mc BENGALI VOWEL SIGN AA
if (0x9be === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x9c1) {
// Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
if (0x9bf <= code && code <= 0x9c0) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
if (0x9c1 <= code && code <= 0x9c4) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
else {
if (code < 0x9d7) {
if (code < 0x9cb) {
// Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
if (0x9c7 <= code && code <= 0x9c8) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0x9cd) {
// Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
if (0x9cb <= code && code <= 0x9cc) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn BENGALI SIGN VIRAMA
if (0x9cd === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0x9e2) {
// Mc BENGALI AU LENGTH MARK
if (0x9d7 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0x9fe) {
// Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
if (0x9e2 <= code && code <= 0x9e3) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn BENGALI SANDHI MARK
if (0x9fe === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
}
else {
if (code < 0xa83) {
if (code < 0xa47) {
if (code < 0xa3c) {
if (code < 0xa03) {
// Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
if (0xa01 <= code && code <= 0xa02) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc GURMUKHI SIGN VISARGA
if (0xa03 === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
else {
if (code < 0xa3e) {
// Mn GURMUKHI SIGN NUKTA
if (0xa3c === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xa41) {
// Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
if (0xa3e <= code && code <= 0xa40) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
if (0xa41 <= code && code <= 0xa42) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
else {
if (code < 0xa70) {
if (code < 0xa4b) {
// Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
if (0xa47 <= code && code <= 0xa48) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xa51) {
// Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
if (0xa4b <= code && code <= 0xa4d) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn GURMUKHI SIGN UDAAT
if (0xa51 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0xa75) {
// Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
if (0xa70 <= code && code <= 0xa71) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xa81) {
// Mn GURMUKHI SIGN YAKASH
if (0xa75 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
if (0xa81 <= code && code <= 0xa82) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
else {
if (code < 0xac9) {
if (code < 0xabe) {
// Mc GUJARATI SIGN VISARGA
if (0xa83 === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
// Mn GUJARATI SIGN NUKTA
if (0xabc === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xac1) {
// Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
if (0xabe <= code && code <= 0xac0) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0xac7) {
// Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
if (0xac1 <= code && code <= 0xac5) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
if (0xac7 <= code && code <= 0xac8) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
else {
if (code < 0xae2) {
if (code < 0xacb) {
// Mc GUJARATI VOWEL SIGN CANDRA O
if (0xac9 === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0xacd) {
// Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
if (0xacb <= code && code <= 0xacc) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn GUJARATI SIGN VIRAMA
if (0xacd === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0xafa) {
// Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
if (0xae2 <= code && code <= 0xae3) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xb01) {
// Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
if (0xafa <= code && code <= 0xaff) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn ORIYA SIGN CANDRABINDU
if (0xb01 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
}
}
}
else {
if (code < 0xcf3) {
if (code < 0xc04) {
if (code < 0xb82) {
if (code < 0xb47) {
if (code < 0xb3e) {
if (code < 0xb3c) {
// Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
if (0xb02 <= code && code <= 0xb03) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn ORIYA SIGN NUKTA
if (0xb3c === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
else {
if (code < 0xb40) {
// Mc ORIYA VOWEL SIGN AA
// Mn ORIYA VOWEL SIGN I
if (0xb3e <= code && code <= 0xb3f) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xb41) {
// Mc ORIYA VOWEL SIGN II
if (0xb40 === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
if (0xb41 <= code && code <= 0xb44) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
else {
if (code < 0xb4d) {
if (code < 0xb4b) {
// Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
if (0xb47 <= code && code <= 0xb48) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
if (0xb4b <= code && code <= 0xb4c) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
else {
if (code < 0xb55) {
// Mn ORIYA SIGN VIRAMA
if (0xb4d === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xb62) {
// Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
// Mc ORIYA AU LENGTH MARK
if (0xb55 <= code && code <= 0xb57) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
if (0xb62 <= code && code <= 0xb63) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
}
}
else {
if (code < 0xbc6) {
if (code < 0xbbf) {
// Mn TAMIL SIGN ANUSVARA
if (0xb82 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
// Mc TAMIL VOWEL SIGN AA
if (0xbbe === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xbc0) {
// Mc TAMIL VOWEL SIGN I
if (0xbbf === code) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0xbc1) {
// Mn TAMIL VOWEL SIGN II
if (0xbc0 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
if (0xbc1 <= code && code <= 0xbc2) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
}
}
else {
if (code < 0xbd7) {
if (code < 0xbca) {
// Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
if (0xbc6 <= code && code <= 0xbc8) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
if (code < 0xbcd) {
// Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
if (0xbca <= code && code <= 0xbcc) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
else {
// Mn TAMIL SIGN VIRAMA
if (0xbcd === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
}
}
else {
if (code < 0xc00) {
// Mc TAMIL AU LENGTH MARK
if (0xbd7 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xc01) {
// Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
if (0xc00 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
// Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
if (0xc01 <= code && code <= 0xc03) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
}
}
}
}
}
}
}
else {
if (code < 0xcbe) {
if (code < 0xc4a) {
if (code < 0xc3e) {
// Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
if (0xc04 === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
// Mn TELUGU SIGN NUKTA
if (0xc3c === code) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xc41) {
// Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
if (0xc3e <= code && code <= 0xc40) {
return boundaries_1.CLUSTER_BREAK.EXTEND;
}
}
else {
if (code < 0xc46) {
// Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
if (0xc41 <= code && code <= 0xc44) {
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;

View File

@@ -0,0 +1,38 @@
"use strict";
/**
* The Grapheme_Cluster_Break property value
* @see https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.EXTENDED_PICTOGRAPHIC = exports.CLUSTER_BREAK = void 0;
var CLUSTER_BREAK;
(function (CLUSTER_BREAK) {
CLUSTER_BREAK[CLUSTER_BREAK["CR"] = 0] = "CR";
CLUSTER_BREAK[CLUSTER_BREAK["LF"] = 1] = "LF";
CLUSTER_BREAK[CLUSTER_BREAK["CONTROL"] = 2] = "CONTROL";
CLUSTER_BREAK[CLUSTER_BREAK["EXTEND"] = 3] = "EXTEND";
CLUSTER_BREAK[CLUSTER_BREAK["REGIONAL_INDICATOR"] = 4] = "REGIONAL_INDICATOR";
CLUSTER_BREAK[CLUSTER_BREAK["SPACINGMARK"] = 5] = "SPACINGMARK";
CLUSTER_BREAK[CLUSTER_BREAK["L"] = 6] = "L";
CLUSTER_BREAK[CLUSTER_BREAK["V"] = 7] = "V";
CLUSTER_BREAK[CLUSTER_BREAK["T"] = 8] = "T";
CLUSTER_BREAK[CLUSTER_BREAK["LV"] = 9] = "LV";
CLUSTER_BREAK[CLUSTER_BREAK["LVT"] = 10] = "LVT";
CLUSTER_BREAK[CLUSTER_BREAK["OTHER"] = 11] = "OTHER";
CLUSTER_BREAK[CLUSTER_BREAK["PREPEND"] = 12] = "PREPEND";
CLUSTER_BREAK[CLUSTER_BREAK["E_BASE"] = 13] = "E_BASE";
CLUSTER_BREAK[CLUSTER_BREAK["E_MODIFIER"] = 14] = "E_MODIFIER";
CLUSTER_BREAK[CLUSTER_BREAK["ZWJ"] = 15] = "ZWJ";
CLUSTER_BREAK[CLUSTER_BREAK["GLUE_AFTER_ZWJ"] = 16] = "GLUE_AFTER_ZWJ";
CLUSTER_BREAK[CLUSTER_BREAK["E_BASE_GAZ"] = 17] = "E_BASE_GAZ";
})(CLUSTER_BREAK = exports.CLUSTER_BREAK || (exports.CLUSTER_BREAK = {}));
/**
* The Emoji character property is an extension of UCD but shares the same namespace and structure
* @see http://www.unicode.org/reports/tr51/tr51-14.html#Emoji_Properties_and_Data_Files
*
* Here we model Extended_Pictograhpic only to implement UAX #29 GB11
* \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
*
* The Emoji character property should not be mixed with Grapheme_Cluster_Break since they are not exclusive
*/
exports.EXTENDED_PICTOGRAPHIC = 101;