mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-02-04 05:50:50 +00:00
nhj
more
This commit is contained in:
958
unified-ai-platform/node_modules/graphemer/lib/Graphemer.js
generated
vendored
Normal file
958
unified-ai-platform/node_modules/graphemer/lib/Graphemer.js
generated
vendored
Normal file
@@ -0,0 +1,958 @@
|
||||
"use strict";
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const boundaries_1 = require("./boundaries");
|
||||
const GraphemerHelper_1 = __importDefault(require("./GraphemerHelper"));
|
||||
const GraphemerIterator_1 = __importDefault(require("./GraphemerIterator"));
|
||||
class Graphemer {
|
||||
/**
|
||||
* Returns the next grapheme break in the string after the given index
|
||||
* @param string {string}
|
||||
* @param index {number}
|
||||
* @returns {number}
|
||||
*/
|
||||
static nextBreak(string, index) {
|
||||
if (index === undefined) {
|
||||
index = 0;
|
||||
}
|
||||
if (index < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (index >= string.length - 1) {
|
||||
return string.length;
|
||||
}
|
||||
const prevCP = GraphemerHelper_1.default.codePointAt(string, index);
|
||||
const prev = Graphemer.getGraphemeBreakProperty(prevCP);
|
||||
const prevEmoji = Graphemer.getEmojiProperty(prevCP);
|
||||
const mid = [];
|
||||
const midEmoji = [];
|
||||
for (let i = index + 1; i < string.length; i++) {
|
||||
// check for already processed low surrogates
|
||||
if (GraphemerHelper_1.default.isSurrogate(string, i - 1)) {
|
||||
continue;
|
||||
}
|
||||
const nextCP = GraphemerHelper_1.default.codePointAt(string, i);
|
||||
const next = Graphemer.getGraphemeBreakProperty(nextCP);
|
||||
const nextEmoji = Graphemer.getEmojiProperty(nextCP);
|
||||
if (GraphemerHelper_1.default.shouldBreak(prev, mid, next, prevEmoji, midEmoji, nextEmoji)) {
|
||||
return i;
|
||||
}
|
||||
mid.push(next);
|
||||
midEmoji.push(nextEmoji);
|
||||
}
|
||||
return string.length;
|
||||
}
|
||||
/**
|
||||
* Breaks the given string into an array of grapheme clusters
|
||||
* @param str {string}
|
||||
* @returns {string[]}
|
||||
*/
|
||||
splitGraphemes(str) {
|
||||
const res = [];
|
||||
let index = 0;
|
||||
let brk;
|
||||
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
|
||||
res.push(str.slice(index, brk));
|
||||
index = brk;
|
||||
}
|
||||
if (index < str.length) {
|
||||
res.push(str.slice(index));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
/**
|
||||
* Returns an iterator of grapheme clusters in the given string
|
||||
* @param str {string}
|
||||
* @returns {GraphemerIterator}
|
||||
*/
|
||||
iterateGraphemes(str) {
|
||||
return new GraphemerIterator_1.default(str, Graphemer.nextBreak);
|
||||
}
|
||||
/**
|
||||
* Returns the number of grapheme clusters in the given string
|
||||
* @param str {string}
|
||||
* @returns {number}
|
||||
*/
|
||||
countGraphemes(str) {
|
||||
let count = 0;
|
||||
let index = 0;
|
||||
let brk;
|
||||
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
|
||||
index = brk;
|
||||
count++;
|
||||
}
|
||||
if (index < str.length) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
/**
|
||||
* Given a Unicode code point, determines this symbol's grapheme break property
|
||||
* @param code {number} Unicode code point
|
||||
* @returns {number}
|
||||
*/
|
||||
static getGraphemeBreakProperty(code) {
|
||||
// Grapheme break property taken from:
|
||||
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
|
||||
// and generated by
|
||||
// node ./scripts/generate-grapheme-break.js
|
||||
if (code < 0xbf09) {
|
||||
if (code < 0xac54) {
|
||||
if (code < 0x102d) {
|
||||
if (code < 0xb02) {
|
||||
if (code < 0x93b) {
|
||||
if (code < 0x6df) {
|
||||
if (code < 0x5bf) {
|
||||
if (code < 0x7f) {
|
||||
if (code < 0xb) {
|
||||
if (code < 0xa) {
|
||||
// Cc [10] <control-0000>..<control-0009>
|
||||
if (0x0 <= code && code <= 0x9) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cc <control-000A>
|
||||
if (0xa === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.LF;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xd) {
|
||||
// Cc [2] <control-000B>..<control-000C>
|
||||
if (0xb <= code && code <= 0xc) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xe) {
|
||||
// Cc <control-000D>
|
||||
if (0xd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.CR;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cc [18] <control-000E>..<control-001F>
|
||||
if (0xe <= code && code <= 0x1f) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x300) {
|
||||
if (code < 0xad) {
|
||||
// Cc [33] <control-007F>..<control-009F>
|
||||
if (0x7f <= code && code <= 0x9f) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf SOFT HYPHEN
|
||||
if (0xad === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x483) {
|
||||
// Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
||||
if (0x300 <= code && code <= 0x36f) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x591) {
|
||||
// Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
|
||||
// Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
if (0x483 <= code && code <= 0x489) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
|
||||
if (0x591 <= code && code <= 0x5bd) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x610) {
|
||||
if (code < 0x5c4) {
|
||||
if (code < 0x5c1) {
|
||||
// Mn HEBREW POINT RAFE
|
||||
if (0x5bf === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
|
||||
if (0x5c1 <= code && code <= 0x5c2) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x5c7) {
|
||||
// Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
if (0x5c4 <= code && code <= 0x5c5) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x600) {
|
||||
// Mn HEBREW POINT QAMATS QATAN
|
||||
if (0x5c7 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
if (0x600 <= code && code <= 0x605) {
|
||||
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x670) {
|
||||
if (code < 0x61c) {
|
||||
// Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
if (0x610 <= code && code <= 0x61a) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x64b) {
|
||||
// Cf ARABIC LETTER MARK
|
||||
if (0x61c === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
|
||||
if (0x64b <= code && code <= 0x65f) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x6d6) {
|
||||
// Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
if (0x670 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x6dd) {
|
||||
// Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
if (0x6d6 <= code && code <= 0x6dc) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf ARABIC END OF AYAH
|
||||
if (0x6dd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x81b) {
|
||||
if (code < 0x730) {
|
||||
if (code < 0x6ea) {
|
||||
if (code < 0x6e7) {
|
||||
// Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
if (0x6df <= code && code <= 0x6e4) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
if (0x6e7 <= code && code <= 0x6e8) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x70f) {
|
||||
// Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
if (0x6ea <= code && code <= 0x6ed) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf SYRIAC ABBREVIATION MARK
|
||||
if (0x70f === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
||||
}
|
||||
// Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
||||
if (0x711 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x7eb) {
|
||||
if (code < 0x7a6) {
|
||||
// Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
if (0x730 <= code && code <= 0x74a) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
if (0x7a6 <= code && code <= 0x7b0) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x7fd) {
|
||||
// Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
if (0x7eb <= code && code <= 0x7f3) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x816) {
|
||||
// Mn NKO DANTAYALAN
|
||||
if (0x7fd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
|
||||
if (0x816 <= code && code <= 0x819) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x898) {
|
||||
if (code < 0x829) {
|
||||
if (code < 0x825) {
|
||||
// Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
if (0x81b <= code && code <= 0x823) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
if (0x825 <= code && code <= 0x827) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x859) {
|
||||
// Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
if (0x829 <= code && code <= 0x82d) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x890) {
|
||||
// Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
if (0x859 <= code && code <= 0x85b) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
|
||||
if (0x890 <= code && code <= 0x891) {
|
||||
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x8e3) {
|
||||
if (code < 0x8ca) {
|
||||
// Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
||||
if (0x898 <= code && code <= 0x89f) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x8e2) {
|
||||
// Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
|
||||
if (0x8ca <= code && code <= 0x8e1) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Cf ARABIC DISPUTED END OF AYAH
|
||||
if (0x8e2 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x903) {
|
||||
// Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
|
||||
if (0x8e3 <= code && code <= 0x902) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc DEVANAGARI SIGN VISARGA
|
||||
if (0x903 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
// Mn DEVANAGARI VOWEL SIGN OE
|
||||
if (0x93a === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa01) {
|
||||
if (code < 0x982) {
|
||||
if (code < 0x94d) {
|
||||
if (code < 0x93e) {
|
||||
// Mc DEVANAGARI VOWEL SIGN OOE
|
||||
if (0x93b === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
// Mn DEVANAGARI SIGN NUKTA
|
||||
if (0x93c === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x941) {
|
||||
// Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
if (0x93e <= code && code <= 0x940) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x949) {
|
||||
// Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
if (0x941 <= code && code <= 0x948) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
if (0x949 <= code && code <= 0x94c) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x951) {
|
||||
if (code < 0x94e) {
|
||||
// Mn DEVANAGARI SIGN VIRAMA
|
||||
if (0x94d === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
|
||||
if (0x94e <= code && code <= 0x94f) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x962) {
|
||||
// Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
|
||||
if (0x951 <= code && code <= 0x957) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x981) {
|
||||
// Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
if (0x962 <= code && code <= 0x963) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn BENGALI SIGN CANDRABINDU
|
||||
if (0x981 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9c7) {
|
||||
if (code < 0x9be) {
|
||||
if (code < 0x9bc) {
|
||||
// Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
if (0x982 <= code && code <= 0x983) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn BENGALI SIGN NUKTA
|
||||
if (0x9bc === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9bf) {
|
||||
// Mc BENGALI VOWEL SIGN AA
|
||||
if (0x9be === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9c1) {
|
||||
// Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
|
||||
if (0x9bf <= code && code <= 0x9c0) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
|
||||
if (0x9c1 <= code && code <= 0x9c4) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9d7) {
|
||||
if (code < 0x9cb) {
|
||||
// Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
|
||||
if (0x9c7 <= code && code <= 0x9c8) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9cd) {
|
||||
// Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
|
||||
if (0x9cb <= code && code <= 0x9cc) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn BENGALI SIGN VIRAMA
|
||||
if (0x9cd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9e2) {
|
||||
// Mc BENGALI AU LENGTH MARK
|
||||
if (0x9d7 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0x9fe) {
|
||||
// Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
|
||||
if (0x9e2 <= code && code <= 0x9e3) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn BENGALI SANDHI MARK
|
||||
if (0x9fe === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa83) {
|
||||
if (code < 0xa47) {
|
||||
if (code < 0xa3c) {
|
||||
if (code < 0xa03) {
|
||||
// Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
|
||||
if (0xa01 <= code && code <= 0xa02) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc GURMUKHI SIGN VISARGA
|
||||
if (0xa03 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa3e) {
|
||||
// Mn GURMUKHI SIGN NUKTA
|
||||
if (0xa3c === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa41) {
|
||||
// Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
|
||||
if (0xa3e <= code && code <= 0xa40) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
|
||||
if (0xa41 <= code && code <= 0xa42) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa70) {
|
||||
if (code < 0xa4b) {
|
||||
// Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
|
||||
if (0xa47 <= code && code <= 0xa48) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa51) {
|
||||
// Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
|
||||
if (0xa4b <= code && code <= 0xa4d) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn GURMUKHI SIGN UDAAT
|
||||
if (0xa51 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa75) {
|
||||
// Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
|
||||
if (0xa70 <= code && code <= 0xa71) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xa81) {
|
||||
// Mn GURMUKHI SIGN YAKASH
|
||||
if (0xa75 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
|
||||
if (0xa81 <= code && code <= 0xa82) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xac9) {
|
||||
if (code < 0xabe) {
|
||||
// Mc GUJARATI SIGN VISARGA
|
||||
if (0xa83 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
// Mn GUJARATI SIGN NUKTA
|
||||
if (0xabc === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xac1) {
|
||||
// Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
|
||||
if (0xabe <= code && code <= 0xac0) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xac7) {
|
||||
// Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
|
||||
if (0xac1 <= code && code <= 0xac5) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
|
||||
if (0xac7 <= code && code <= 0xac8) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xae2) {
|
||||
if (code < 0xacb) {
|
||||
// Mc GUJARATI VOWEL SIGN CANDRA O
|
||||
if (0xac9 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xacd) {
|
||||
// Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
||||
if (0xacb <= code && code <= 0xacc) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn GUJARATI SIGN VIRAMA
|
||||
if (0xacd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xafa) {
|
||||
// Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
||||
if (0xae2 <= code && code <= 0xae3) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb01) {
|
||||
// Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
||||
if (0xafa <= code && code <= 0xaff) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn ORIYA SIGN CANDRABINDU
|
||||
if (0xb01 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xcf3) {
|
||||
if (code < 0xc04) {
|
||||
if (code < 0xb82) {
|
||||
if (code < 0xb47) {
|
||||
if (code < 0xb3e) {
|
||||
if (code < 0xb3c) {
|
||||
// Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
||||
if (0xb02 <= code && code <= 0xb03) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn ORIYA SIGN NUKTA
|
||||
if (0xb3c === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb40) {
|
||||
// Mc ORIYA VOWEL SIGN AA
|
||||
// Mn ORIYA VOWEL SIGN I
|
||||
if (0xb3e <= code && code <= 0xb3f) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb41) {
|
||||
// Mc ORIYA VOWEL SIGN II
|
||||
if (0xb40 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
|
||||
if (0xb41 <= code && code <= 0xb44) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb4d) {
|
||||
if (code < 0xb4b) {
|
||||
// Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
|
||||
if (0xb47 <= code && code <= 0xb48) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
|
||||
if (0xb4b <= code && code <= 0xb4c) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb55) {
|
||||
// Mn ORIYA SIGN VIRAMA
|
||||
if (0xb4d === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xb62) {
|
||||
// Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
|
||||
// Mc ORIYA AU LENGTH MARK
|
||||
if (0xb55 <= code && code <= 0xb57) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
|
||||
if (0xb62 <= code && code <= 0xb63) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xbc6) {
|
||||
if (code < 0xbbf) {
|
||||
// Mn TAMIL SIGN ANUSVARA
|
||||
if (0xb82 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
// Mc TAMIL VOWEL SIGN AA
|
||||
if (0xbbe === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xbc0) {
|
||||
// Mc TAMIL VOWEL SIGN I
|
||||
if (0xbbf === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xbc1) {
|
||||
// Mn TAMIL VOWEL SIGN II
|
||||
if (0xbc0 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
|
||||
if (0xbc1 <= code && code <= 0xbc2) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xbd7) {
|
||||
if (code < 0xbca) {
|
||||
// Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
|
||||
if (0xbc6 <= code && code <= 0xbc8) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xbcd) {
|
||||
// Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
|
||||
if (0xbca <= code && code <= 0xbcc) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mn TAMIL SIGN VIRAMA
|
||||
if (0xbcd === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xc00) {
|
||||
// Mc TAMIL AU LENGTH MARK
|
||||
if (0xbd7 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xc01) {
|
||||
// Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
|
||||
if (0xc00 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
|
||||
if (0xc01 <= code && code <= 0xc03) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xcbe) {
|
||||
if (code < 0xc4a) {
|
||||
if (code < 0xc3e) {
|
||||
// Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
|
||||
if (0xc04 === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
// Mn TELUGU SIGN NUKTA
|
||||
if (0xc3c === code) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xc41) {
|
||||
// Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
if (0xc3e <= code && code <= 0xc40) {
|
||||
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (code < 0xc46) {
|
||||
// Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
||||
if (0xc41 <= code && code <= 0xc44) {
|
||||
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
||||
|
||||
38
unified-ai-platform/node_modules/graphemer/lib/boundaries.js
generated
vendored
Normal file
38
unified-ai-platform/node_modules/graphemer/lib/boundaries.js
generated
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
"use strict";
|
||||
/**
|
||||
* The Grapheme_Cluster_Break property value
|
||||
* @see https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.EXTENDED_PICTOGRAPHIC = exports.CLUSTER_BREAK = void 0;
|
||||
var CLUSTER_BREAK;
|
||||
(function (CLUSTER_BREAK) {
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["CR"] = 0] = "CR";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["LF"] = 1] = "LF";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["CONTROL"] = 2] = "CONTROL";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["EXTEND"] = 3] = "EXTEND";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["REGIONAL_INDICATOR"] = 4] = "REGIONAL_INDICATOR";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["SPACINGMARK"] = 5] = "SPACINGMARK";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["L"] = 6] = "L";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["V"] = 7] = "V";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["T"] = 8] = "T";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["LV"] = 9] = "LV";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["LVT"] = 10] = "LVT";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["OTHER"] = 11] = "OTHER";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["PREPEND"] = 12] = "PREPEND";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["E_BASE"] = 13] = "E_BASE";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["E_MODIFIER"] = 14] = "E_MODIFIER";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["ZWJ"] = 15] = "ZWJ";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["GLUE_AFTER_ZWJ"] = 16] = "GLUE_AFTER_ZWJ";
|
||||
CLUSTER_BREAK[CLUSTER_BREAK["E_BASE_GAZ"] = 17] = "E_BASE_GAZ";
|
||||
})(CLUSTER_BREAK = exports.CLUSTER_BREAK || (exports.CLUSTER_BREAK = {}));
|
||||
/**
|
||||
* The Emoji character property is an extension of UCD but shares the same namespace and structure
|
||||
* @see http://www.unicode.org/reports/tr51/tr51-14.html#Emoji_Properties_and_Data_Files
|
||||
*
|
||||
* Here we model Extended_Pictograhpic only to implement UAX #29 GB11
|
||||
* \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
|
||||
*
|
||||
* The Emoji character property should not be mixed with Grapheme_Cluster_Break since they are not exclusive
|
||||
*/
|
||||
exports.EXTENDED_PICTOGRAPHIC = 101;
|
||||
Reference in New Issue
Block a user