mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2025-09-15 12:27:30 +00:00
958 lines
59 KiB
JavaScript
958 lines
59 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const boundaries_1 = require("./boundaries");
|
|
const GraphemerHelper_1 = __importDefault(require("./GraphemerHelper"));
|
|
const GraphemerIterator_1 = __importDefault(require("./GraphemerIterator"));
|
|
class Graphemer {
|
|
/**
|
|
* Returns the next grapheme break in the string after the given index
|
|
* @param string {string}
|
|
* @param index {number}
|
|
* @returns {number}
|
|
*/
|
|
static nextBreak(string, index) {
|
|
if (index === undefined) {
|
|
index = 0;
|
|
}
|
|
if (index < 0) {
|
|
return 0;
|
|
}
|
|
if (index >= string.length - 1) {
|
|
return string.length;
|
|
}
|
|
const prevCP = GraphemerHelper_1.default.codePointAt(string, index);
|
|
const prev = Graphemer.getGraphemeBreakProperty(prevCP);
|
|
const prevEmoji = Graphemer.getEmojiProperty(prevCP);
|
|
const mid = [];
|
|
const midEmoji = [];
|
|
for (let i = index + 1; i < string.length; i++) {
|
|
// check for already processed low surrogates
|
|
if (GraphemerHelper_1.default.isSurrogate(string, i - 1)) {
|
|
continue;
|
|
}
|
|
const nextCP = GraphemerHelper_1.default.codePointAt(string, i);
|
|
const next = Graphemer.getGraphemeBreakProperty(nextCP);
|
|
const nextEmoji = Graphemer.getEmojiProperty(nextCP);
|
|
if (GraphemerHelper_1.default.shouldBreak(prev, mid, next, prevEmoji, midEmoji, nextEmoji)) {
|
|
return i;
|
|
}
|
|
mid.push(next);
|
|
midEmoji.push(nextEmoji);
|
|
}
|
|
return string.length;
|
|
}
|
|
/**
|
|
* Breaks the given string into an array of grapheme clusters
|
|
* @param str {string}
|
|
* @returns {string[]}
|
|
*/
|
|
splitGraphemes(str) {
|
|
const res = [];
|
|
let index = 0;
|
|
let brk;
|
|
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
|
|
res.push(str.slice(index, brk));
|
|
index = brk;
|
|
}
|
|
if (index < str.length) {
|
|
res.push(str.slice(index));
|
|
}
|
|
return res;
|
|
}
|
|
/**
|
|
* Returns an iterator of grapheme clusters in the given string
|
|
* @param str {string}
|
|
* @returns {GraphemerIterator}
|
|
*/
|
|
iterateGraphemes(str) {
|
|
return new GraphemerIterator_1.default(str, Graphemer.nextBreak);
|
|
}
|
|
/**
|
|
* Returns the number of grapheme clusters in the given string
|
|
* @param str {string}
|
|
* @returns {number}
|
|
*/
|
|
countGraphemes(str) {
|
|
let count = 0;
|
|
let index = 0;
|
|
let brk;
|
|
while ((brk = Graphemer.nextBreak(str, index)) < str.length) {
|
|
index = brk;
|
|
count++;
|
|
}
|
|
if (index < str.length) {
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
/**
|
|
* Given a Unicode code point, determines this symbol's grapheme break property
|
|
* @param code {number} Unicode code point
|
|
* @returns {number}
|
|
*/
|
|
static getGraphemeBreakProperty(code) {
|
|
// Grapheme break property taken from:
|
|
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
|
|
// and generated by
|
|
// node ./scripts/generate-grapheme-break.js
|
|
if (code < 0xbf09) {
|
|
if (code < 0xac54) {
|
|
if (code < 0x102d) {
|
|
if (code < 0xb02) {
|
|
if (code < 0x93b) {
|
|
if (code < 0x6df) {
|
|
if (code < 0x5bf) {
|
|
if (code < 0x7f) {
|
|
if (code < 0xb) {
|
|
if (code < 0xa) {
|
|
// Cc [10] <control-0000>..<control-0009>
|
|
if (0x0 <= code && code <= 0x9) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
else {
|
|
// Cc <control-000A>
|
|
if (0xa === code) {
|
|
return boundaries_1.CLUSTER_BREAK.LF;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xd) {
|
|
// Cc [2] <control-000B>..<control-000C>
|
|
if (0xb <= code && code <= 0xc) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xe) {
|
|
// Cc <control-000D>
|
|
if (0xd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.CR;
|
|
}
|
|
}
|
|
else {
|
|
// Cc [18] <control-000E>..<control-001F>
|
|
if (0xe <= code && code <= 0x1f) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x300) {
|
|
if (code < 0xad) {
|
|
// Cc [33] <control-007F>..<control-009F>
|
|
if (0x7f <= code && code <= 0x9f) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
else {
|
|
// Cf SOFT HYPHEN
|
|
if (0xad === code) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x483) {
|
|
// Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
|
if (0x300 <= code && code <= 0x36f) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x591) {
|
|
// Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
|
|
// Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
|
if (0x483 <= code && code <= 0x489) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
|
|
if (0x591 <= code && code <= 0x5bd) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x610) {
|
|
if (code < 0x5c4) {
|
|
if (code < 0x5c1) {
|
|
// Mn HEBREW POINT RAFE
|
|
if (0x5bf === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
|
|
if (0x5c1 <= code && code <= 0x5c2) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x5c7) {
|
|
// Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
|
if (0x5c4 <= code && code <= 0x5c5) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x600) {
|
|
// Mn HEBREW POINT QAMATS QATAN
|
|
if (0x5c7 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
|
if (0x600 <= code && code <= 0x605) {
|
|
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x670) {
|
|
if (code < 0x61c) {
|
|
// Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
|
if (0x610 <= code && code <= 0x61a) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x64b) {
|
|
// Cf ARABIC LETTER MARK
|
|
if (0x61c === code) {
|
|
return boundaries_1.CLUSTER_BREAK.CONTROL;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
|
|
if (0x64b <= code && code <= 0x65f) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x6d6) {
|
|
// Mn ARABIC LETTER SUPERSCRIPT ALEF
|
|
if (0x670 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x6dd) {
|
|
// Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
|
if (0x6d6 <= code && code <= 0x6dc) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Cf ARABIC END OF AYAH
|
|
if (0x6dd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x81b) {
|
|
if (code < 0x730) {
|
|
if (code < 0x6ea) {
|
|
if (code < 0x6e7) {
|
|
// Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
|
if (0x6df <= code && code <= 0x6e4) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
|
if (0x6e7 <= code && code <= 0x6e8) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x70f) {
|
|
// Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
|
if (0x6ea <= code && code <= 0x6ed) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Cf SYRIAC ABBREVIATION MARK
|
|
if (0x70f === code) {
|
|
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
|
}
|
|
// Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
|
if (0x711 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x7eb) {
|
|
if (code < 0x7a6) {
|
|
// Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
|
if (0x730 <= code && code <= 0x74a) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [11] THAANA ABAFILI..THAANA SUKUN
|
|
if (0x7a6 <= code && code <= 0x7b0) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x7fd) {
|
|
// Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
|
if (0x7eb <= code && code <= 0x7f3) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x816) {
|
|
// Mn NKO DANTAYALAN
|
|
if (0x7fd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
|
|
if (0x816 <= code && code <= 0x819) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x898) {
|
|
if (code < 0x829) {
|
|
if (code < 0x825) {
|
|
// Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
|
if (0x81b <= code && code <= 0x823) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
|
if (0x825 <= code && code <= 0x827) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x859) {
|
|
// Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
|
if (0x829 <= code && code <= 0x82d) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x890) {
|
|
// Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
|
if (0x859 <= code && code <= 0x85b) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
|
|
if (0x890 <= code && code <= 0x891) {
|
|
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x8e3) {
|
|
if (code < 0x8ca) {
|
|
// Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
|
if (0x898 <= code && code <= 0x89f) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x8e2) {
|
|
// Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
|
|
if (0x8ca <= code && code <= 0x8e1) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Cf ARABIC DISPUTED END OF AYAH
|
|
if (0x8e2 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.PREPEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x903) {
|
|
// Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
|
|
if (0x8e3 <= code && code <= 0x902) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc DEVANAGARI SIGN VISARGA
|
|
if (0x903 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
// Mn DEVANAGARI VOWEL SIGN OE
|
|
if (0x93a === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa01) {
|
|
if (code < 0x982) {
|
|
if (code < 0x94d) {
|
|
if (code < 0x93e) {
|
|
// Mc DEVANAGARI VOWEL SIGN OOE
|
|
if (0x93b === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
// Mn DEVANAGARI SIGN NUKTA
|
|
if (0x93c === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x941) {
|
|
// Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
|
if (0x93e <= code && code <= 0x940) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x949) {
|
|
// Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
|
if (0x941 <= code && code <= 0x948) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
|
if (0x949 <= code && code <= 0x94c) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x951) {
|
|
if (code < 0x94e) {
|
|
// Mn DEVANAGARI SIGN VIRAMA
|
|
if (0x94d === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
|
|
if (0x94e <= code && code <= 0x94f) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x962) {
|
|
// Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
|
|
if (0x951 <= code && code <= 0x957) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x981) {
|
|
// Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
|
if (0x962 <= code && code <= 0x963) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn BENGALI SIGN CANDRABINDU
|
|
if (0x981 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9c7) {
|
|
if (code < 0x9be) {
|
|
if (code < 0x9bc) {
|
|
// Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
|
if (0x982 <= code && code <= 0x983) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn BENGALI SIGN NUKTA
|
|
if (0x9bc === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9bf) {
|
|
// Mc BENGALI VOWEL SIGN AA
|
|
if (0x9be === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9c1) {
|
|
// Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
|
|
if (0x9bf <= code && code <= 0x9c0) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
|
|
if (0x9c1 <= code && code <= 0x9c4) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9d7) {
|
|
if (code < 0x9cb) {
|
|
// Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
|
|
if (0x9c7 <= code && code <= 0x9c8) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9cd) {
|
|
// Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
|
|
if (0x9cb <= code && code <= 0x9cc) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn BENGALI SIGN VIRAMA
|
|
if (0x9cd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9e2) {
|
|
// Mc BENGALI AU LENGTH MARK
|
|
if (0x9d7 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0x9fe) {
|
|
// Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
|
|
if (0x9e2 <= code && code <= 0x9e3) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn BENGALI SANDHI MARK
|
|
if (0x9fe === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa83) {
|
|
if (code < 0xa47) {
|
|
if (code < 0xa3c) {
|
|
if (code < 0xa03) {
|
|
// Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
|
|
if (0xa01 <= code && code <= 0xa02) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc GURMUKHI SIGN VISARGA
|
|
if (0xa03 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa3e) {
|
|
// Mn GURMUKHI SIGN NUKTA
|
|
if (0xa3c === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa41) {
|
|
// Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
|
|
if (0xa3e <= code && code <= 0xa40) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
|
|
if (0xa41 <= code && code <= 0xa42) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa70) {
|
|
if (code < 0xa4b) {
|
|
// Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
|
|
if (0xa47 <= code && code <= 0xa48) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa51) {
|
|
// Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
|
|
if (0xa4b <= code && code <= 0xa4d) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn GURMUKHI SIGN UDAAT
|
|
if (0xa51 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa75) {
|
|
// Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
|
|
if (0xa70 <= code && code <= 0xa71) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xa81) {
|
|
// Mn GURMUKHI SIGN YAKASH
|
|
if (0xa75 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
|
|
if (0xa81 <= code && code <= 0xa82) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xac9) {
|
|
if (code < 0xabe) {
|
|
// Mc GUJARATI SIGN VISARGA
|
|
if (0xa83 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
// Mn GUJARATI SIGN NUKTA
|
|
if (0xabc === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xac1) {
|
|
// Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
|
|
if (0xabe <= code && code <= 0xac0) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xac7) {
|
|
// Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
|
|
if (0xac1 <= code && code <= 0xac5) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
|
|
if (0xac7 <= code && code <= 0xac8) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xae2) {
|
|
if (code < 0xacb) {
|
|
// Mc GUJARATI VOWEL SIGN CANDRA O
|
|
if (0xac9 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xacd) {
|
|
// Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
|
if (0xacb <= code && code <= 0xacc) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn GUJARATI SIGN VIRAMA
|
|
if (0xacd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xafa) {
|
|
// Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
|
if (0xae2 <= code && code <= 0xae3) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb01) {
|
|
// Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
|
if (0xafa <= code && code <= 0xaff) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn ORIYA SIGN CANDRABINDU
|
|
if (0xb01 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xcf3) {
|
|
if (code < 0xc04) {
|
|
if (code < 0xb82) {
|
|
if (code < 0xb47) {
|
|
if (code < 0xb3e) {
|
|
if (code < 0xb3c) {
|
|
// Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
|
if (0xb02 <= code && code <= 0xb03) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn ORIYA SIGN NUKTA
|
|
if (0xb3c === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb40) {
|
|
// Mc ORIYA VOWEL SIGN AA
|
|
// Mn ORIYA VOWEL SIGN I
|
|
if (0xb3e <= code && code <= 0xb3f) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb41) {
|
|
// Mc ORIYA VOWEL SIGN II
|
|
if (0xb40 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
|
|
if (0xb41 <= code && code <= 0xb44) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb4d) {
|
|
if (code < 0xb4b) {
|
|
// Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
|
|
if (0xb47 <= code && code <= 0xb48) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
|
|
if (0xb4b <= code && code <= 0xb4c) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb55) {
|
|
// Mn ORIYA SIGN VIRAMA
|
|
if (0xb4d === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xb62) {
|
|
// Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
|
|
// Mc ORIYA AU LENGTH MARK
|
|
if (0xb55 <= code && code <= 0xb57) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
|
|
if (0xb62 <= code && code <= 0xb63) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xbc6) {
|
|
if (code < 0xbbf) {
|
|
// Mn TAMIL SIGN ANUSVARA
|
|
if (0xb82 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
// Mc TAMIL VOWEL SIGN AA
|
|
if (0xbbe === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xbc0) {
|
|
// Mc TAMIL VOWEL SIGN I
|
|
if (0xbbf === code) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xbc1) {
|
|
// Mn TAMIL VOWEL SIGN II
|
|
if (0xbc0 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
|
|
if (0xbc1 <= code && code <= 0xbc2) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xbd7) {
|
|
if (code < 0xbca) {
|
|
// Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
|
|
if (0xbc6 <= code && code <= 0xbc8) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xbcd) {
|
|
// Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
|
|
if (0xbca <= code && code <= 0xbcc) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
else {
|
|
// Mn TAMIL SIGN VIRAMA
|
|
if (0xbcd === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xc00) {
|
|
// Mc TAMIL AU LENGTH MARK
|
|
if (0xbd7 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xc01) {
|
|
// Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
|
|
if (0xc00 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
// Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
|
|
if (0xc01 <= code && code <= 0xc03) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xcbe) {
|
|
if (code < 0xc4a) {
|
|
if (code < 0xc3e) {
|
|
// Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
|
|
if (0xc04 === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
// Mn TELUGU SIGN NUKTA
|
|
if (0xc3c === code) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xc41) {
|
|
// Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
|
if (0xc3e <= code && code <= 0xc40) {
|
|
return boundaries_1.CLUSTER_BREAK.EXTEND;
|
|
}
|
|
}
|
|
else {
|
|
if (code < 0xc46) {
|
|
// Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
|
if (0xc41 <= code && code <= 0xc44) {
|
|
return boundaries_1.CLUSTER_BREAK.SPACINGMARK;
|
|
|