From 754277d1468782dcb8f3b77b57f128b83c4acffb Mon Sep 17 00:00:00 2001 From: scossu Date: Thu, 29 Jan 2026 22:19:16 -0500 Subject: [PATCH 1/2] Add many language tests; some table updates. --- scriptshifter/tables/data/_ignore_base.yml | 45 ++- .../tables/data/arabic 2025-12-01.yml | 327 +++++++++++++++++ scriptshifter/tables/data/arabic.yml | 127 ++++--- scriptshifter/tables/data/bodo_bengali.yml | 19 + scriptshifter/tables/data/bodo_devanagari.yml | 19 + .../tables/data/chuvash_cyrillic.yml | 28 +- scriptshifter/tables/data/coptic.yml | 341 ++++++++++++++++++ scriptshifter/tables/data/glagolitic.yml | 221 ++++++++++++ scriptshifter/tables/data/inuktitut.yml | 314 ++++++++++++++++ scriptshifter/tables/data/pali.yml | 18 - scriptshifter/tables/index.yml | 111 +++--- test/data/script_samples/_bengali_base.csv | 6 + test/data/script_samples/_ethiopic_base.csv | 41 +++ test/data/script_samples/_gurmukhi_base.csv | 14 + test/data/script_samples/abazin_cyrillic.csv | 2 + test/data/script_samples/abkhaz_cyrillic.csv | 2 + test/data/script_samples/adygei_cyrillic.csv | 2 + test/data/script_samples/altai_cyrillic.csv | 2 + test/data/script_samples/amharic.csv | 41 +++ test/data/script_samples/arabic.csv | 20 +- test/data/script_samples/argobba_ethiopic.csv | 41 +++ test/data/script_samples/armenian.csv | 2 + test/data/script_samples/assamese.csv | 4 + test/data/script_samples/avaric_cyrillic.csv | 2 + .../data/script_samples/awadhi_devanagari.csv | 8 + .../script_samples/azerbaijani_cyrillic.csv | 2 + test/data/script_samples/balkar_cyrillic.csv | 1 + test/data/script_samples/bashkir_cyrillic.csv | 2 + test/data/script_samples/belarusian.csv | 2 + test/data/script_samples/bengali.csv | 6 + .../data/script_samples/bihari_devanagari.csv | 8 + test/data/script_samples/bodo_bengali.csv | 6 + test/data/script_samples/bodo_devanagari.csv | 7 + test/data/script_samples/braj_devanagari.csv | 6 + test/data/script_samples/bulgarian.csv | 2 + test/data/script_samples/buriat.csv | 2 + test/data/script_samples/buriat_cyrillic.csv | 2 + .../script_samples/buriat_mongol_bichig.csv | 6 + test/data/script_samples/burmese.csv | 53 +-- test/data/script_samples/chechen_cyrillic.csv | 2 + test/data/script_samples/chukchi_cyrillic.csv | 2 + test/data/script_samples/church_slavonic.csv | 1 + test/data/script_samples/chuvash_cyrillic.csv | 2 + test/data/script_samples/coptic.csv | 2 + test/data/script_samples/cyrillic_generic.csv | 8 + test/data/script_samples/dargwa_cyrillic.csv | 2 + .../script_samples/devanagari_generic.csv | 8 + test/data/script_samples/divehi_thaana.csv | 3 + test/data/script_samples/dogri_devanagari.csv | 7 + test/data/script_samples/dungan_cyrillic.csv | 2 + test/data/script_samples/dzongkha_tibetan.csv | 7 + .../script_samples/eskimo_yuit_cyrillic.csv | 2 + test/data/script_samples/ethiopic_generic.csv | 41 +++ .../script_samples/even_evenki_cyrillic.csv | 2 + test/data/script_samples/glagolitic.csv | 1 + test/data/script_samples/ignore.csv | 27 ++ test/data/script_samples/inuktitut.csv | 20 + test/data/script_samples/lepcha.csv | 4 + .../script_samples/mongolian_cyrillic.csv | 8 + .../mongolian_mongol_bichig.csv | 6 + .../new_csv_files_2025-01-29.zip | Bin 0 -> 34749 bytes test/data/script_samples/russian.csv | 8 + test/data/script_samples/tibetan.csv | 35 +- test/data/script_samples/ukrainian.csv | 2 + test/integration.py | 2 +- 65 files changed, 1819 insertions(+), 247 deletions(-) create mode 100644 scriptshifter/tables/data/arabic 2025-12-01.yml create mode 100644 scriptshifter/tables/data/bodo_bengali.yml create mode 100644 scriptshifter/tables/data/bodo_devanagari.yml create mode 100644 scriptshifter/tables/data/coptic.yml create mode 100644 scriptshifter/tables/data/glagolitic.yml create mode 100644 scriptshifter/tables/data/inuktitut.yml delete mode 100644 scriptshifter/tables/data/pali.yml create mode 100644 test/data/script_samples/_bengali_base.csv create mode 100644 test/data/script_samples/_ethiopic_base.csv create mode 100644 test/data/script_samples/_gurmukhi_base.csv create mode 100644 test/data/script_samples/abazin_cyrillic.csv create mode 100644 test/data/script_samples/abkhaz_cyrillic.csv create mode 100644 test/data/script_samples/adygei_cyrillic.csv create mode 100644 test/data/script_samples/altai_cyrillic.csv create mode 100644 test/data/script_samples/amharic.csv create mode 100644 test/data/script_samples/argobba_ethiopic.csv create mode 100644 test/data/script_samples/armenian.csv create mode 100644 test/data/script_samples/assamese.csv create mode 100644 test/data/script_samples/avaric_cyrillic.csv create mode 100644 test/data/script_samples/awadhi_devanagari.csv create mode 100644 test/data/script_samples/azerbaijani_cyrillic.csv create mode 100644 test/data/script_samples/balkar_cyrillic.csv create mode 100644 test/data/script_samples/bashkir_cyrillic.csv create mode 100644 test/data/script_samples/belarusian.csv create mode 100644 test/data/script_samples/bengali.csv create mode 100644 test/data/script_samples/bihari_devanagari.csv create mode 100644 test/data/script_samples/bodo_bengali.csv create mode 100644 test/data/script_samples/bodo_devanagari.csv create mode 100644 test/data/script_samples/braj_devanagari.csv create mode 100644 test/data/script_samples/bulgarian.csv create mode 100644 test/data/script_samples/buriat.csv create mode 100644 test/data/script_samples/buriat_cyrillic.csv create mode 100644 test/data/script_samples/buriat_mongol_bichig.csv create mode 100644 test/data/script_samples/chechen_cyrillic.csv create mode 100644 test/data/script_samples/chukchi_cyrillic.csv create mode 100644 test/data/script_samples/church_slavonic.csv create mode 100644 test/data/script_samples/chuvash_cyrillic.csv create mode 100644 test/data/script_samples/coptic.csv create mode 100644 test/data/script_samples/cyrillic_generic.csv create mode 100644 test/data/script_samples/dargwa_cyrillic.csv create mode 100644 test/data/script_samples/devanagari_generic.csv create mode 100644 test/data/script_samples/divehi_thaana.csv create mode 100644 test/data/script_samples/dogri_devanagari.csv create mode 100644 test/data/script_samples/dungan_cyrillic.csv create mode 100644 test/data/script_samples/dzongkha_tibetan.csv create mode 100644 test/data/script_samples/eskimo_yuit_cyrillic.csv create mode 100644 test/data/script_samples/ethiopic_generic.csv create mode 100644 test/data/script_samples/even_evenki_cyrillic.csv create mode 100644 test/data/script_samples/glagolitic.csv create mode 100644 test/data/script_samples/ignore.csv create mode 100644 test/data/script_samples/inuktitut.csv create mode 100644 test/data/script_samples/lepcha.csv create mode 100644 test/data/script_samples/mongolian_cyrillic.csv create mode 100644 test/data/script_samples/mongolian_mongol_bichig.csv create mode 100644 test/data/script_samples/new_csv_files_2025-01-29.zip create mode 100644 test/data/script_samples/russian.csv create mode 100644 test/data/script_samples/ukrainian.csv diff --git a/scriptshifter/tables/data/_ignore_base.yml b/scriptshifter/tables/data/_ignore_base.yml index 7ca964f..e54836c 100644 --- a/scriptshifter/tables/data/_ignore_base.yml +++ b/scriptshifter/tables/data/_ignore_base.yml @@ -8,16 +8,24 @@ general: roman_to_script: ignore: - "At head of title" - - "at head of title" - "Colophon" - - "colophon" + - "Colophon" - "Cover title" + - "On cover" + - "S.l." + - "Spine title" + - "and one other" + - "at head of title" + - "colophon" + - "cover title" - "date of publication not identified" + - "et al." + - "on cover" - "place of publication not identified" - "publisher not identified" - - "and one other" - - "and others" - - "et al." + - "s.l." + - "s.n." + - "spine title" ignore_ptn: - "and ([a-z0-9]+ )?others" @@ -29,17 +37,22 @@ roman_to_script: # dedicated U+2160÷U+216F (uppercase Roman # numerals) and/or U+2170÷U+217F (lower case Roman # numerals) ranges to avoid this ambiguity. - - "I{2,3}\\b" - - "I(V|X)\\b" - - "LI{1,3}\\b" - - "LI?(V|X)\\b" - - "L(V|X{1,3})I{,3}\\b" - - "LX{1,3}I?V\\b" - - "LX{1,3}VI{,3}\\b" - - "VI{1,3}\\b" - - "X{1,3}I{1,3}\\b" - - "X{1,3}I(V|X)\\b" - - "X{1,3}VI{,3}\\b" + - "M{,3}(CM)?C?D?C{1,3}L?X{,3}I{,3}\\b" + - "M{1,3}(CM)?C?D?C{,3}L?X{,3}I{,3}\\b" + - "M{,3}(CM)?C?D?C{1,3}L?X{,3}I[VX]\\b" + - "M{1,3}(CM)?C?D?C{,3}L?X{,3}I[VX]\\b" + + # NMay not be prefixed by M, D, C, L. Cannot use for single digits. + - "M{,3}(CM)?C?D?C{,3}I(I{,2}V|X)\\b" + - "M{,3}(CM)?C?D?C{,3}LI{1,3}\\b" + - "M{,3}(CM)?C?D?C{,3}LI?[VX]\\b" + - "M{,3}(CM)?C?D?C{,3}L(V|X{1,3})I{,3}\\b" + - "M{,3}(CM)?C?D?C{,3}LX{1,3}I?[VX]\\b" + - "M{,3}(CM)?C?D?C{,3}LX{1,3}VI{,3}\\b" + - "M{,3}(CM)?C?D?C{,3}VI{1,3}\\b" + - "M{,3}(CM)?C?D?C{,3}X{1,3}C?I{1,3}\\b" + - "M{,3}(CM)?C?D?C{,3}X{1,3}C?I[VX]\\b" + - "M{,3}(CM)?C?D?C{,3}X{1,3}C?VI{,3}\\b" # MARC sub-field markers. - "[\u2021\u01C2\\$][0-9a-z]\\b" diff --git a/scriptshifter/tables/data/arabic 2025-12-01.yml b/scriptshifter/tables/data/arabic 2025-12-01.yml new file mode 100644 index 0000000..589d7fa --- /dev/null +++ b/scriptshifter/tables/data/arabic 2025-12-01.yml @@ -0,0 +1,327 @@ +# Arabic S2R using the 3rd-party ArabicTransliterator library: +# https://github.com/MTG/ArabicTransliterator + +--- +general: + name: Arabic + parents: + - _ignore_base + description: > + Version 1.0 (2025-11-29) - Arabic language R2S using a conversion table; S2R using a 3rd party library. + case_sensitive: false + +roman_to_script: + map: + + # Punctuation marks: + "*": "\u066D" + ",": "\u060C" + ";": "\u061B" + "?": "\u061F" + + # Exceptions for specific words + + # Allah + "%alla\u0304h%": "\uFDF2" + "alla\u0304h": "\u0627\u0644\u0644\u0647" + + # Qur'an + "qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" + + # lillah + "lilla\u0304h": "\u0644\u0644\u0647" + + # billah + "billa\u0304h": "\u0628\u0644\u0644\u0647" + + # Rahman + "rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" + + # Ruwat + "ruwa\u0304t": "\u0631\u0648\u0627\u0629" + + # Hadha + "ha\u0304dha\u0304": "\u0647\u0630\u0627" + + # Hadhihi + "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" + + # dhalika + "dha\u0304lika": "\u0630\u0644\u0643" + + # Ibn when it appears in the middle of a name sequence + "ibn": "\u0628\u0646" + + # H[dot below]aya[macron]t + "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" + + # "sh[dot below] as in "Ishaq" + "sh\u0323": "\u0633\u062D" + + # "s[prime]h" combos + "s\u02B9h": "\u0633\u0647" + + # "th[dot below]" + "th\u0323": "\u062A\u062D" + + # dh[dot under] + "dh\u0323": "\u062F\u062D" + + # La-hu + "la-hu": "\u0644\u0647" + + # Mi'ah + "mi\u02BEah": "\u0645\u0627\u0626\u0629" + "mi\u02BCah": "\u0645\u0627\u0626\u0629" + + # Mi'at + "mi\u02BEat": "\u0645\u0627\u0626\u0629" + "mi\u02BCat": "\u0645\u0627\u0626\u0629" + + # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu + # will technically use \u06F0-06F9. This needs further discussion with PSD + # as RLIN21 used Hindi numbers, Connexion and Voyager does not.) + + # Edition statements with Latin number + "al-t\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" + "al-t\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" + "al-t\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" + "al-t\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4" + "al-t\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5" + "al-t\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6" + "al-t\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7" + "al-t\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" + "al-t\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" + + # Use Basic Arabic-Indic \u0660-0669 + "0": "\u0660" + "1": "\u0661" + "2": "\u0662" + "3": "\u0663" + "4": "\u0664" + "5": "\u0665" + "6": "\u0666" + "7": "\u0667" + "8": "\u0668" + "9": "\u0669" + + # Hyphenated prefixes: + "wa-": "\u0648" + "bi-": "\u0628" + "al-": "\u0627\u0644" + "lil-": "\u0644\u0644" + "li-": "\u0644" + "la\u0304-": "\u0644" + "fi\u0304-": "\u0641\u064A" + "ka-": "\u0643" + + # Vowels and vowel/consonant combinations - ta-marbutah at end of word + "ah%": "\u0629" + "at%": "\u0629" + + # tanwin at end of word + "an%": "\u0627" + + # ayn-alif combo + "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621" + "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621" + + "\u02BBa\u0304": "\u0639\u0627" + + "\u02BBi\u0304y": "\u0639\u064A" + "\u02BBi\u0304": "\u0639\u064A" + + "\u02BBu\u0304": "\u0639\u0648" + "\u02BBu": "\u0639" + + "%\u02BBa": "\u0639" + # "\u02BBa%": "\u0639" + + # alif and hamzas for all occasions + + # truncation necessary? It seems to work fine with. + + "i\u0304\u02BEah%": "\u064A\u0626\u0629" + "i\u0304\u02BCah%": "\u064A\u0626\u0629" + + "i\u0304\u02BEat%": "\u064A\u0626\u0629" + "i\u0304\u02BCat%": "\u064A\u0626\u0629" + + "i\u02BEa\u0304%": "\u0626\u0627" + "i\u02BCa\u0304%": "\u0626\u0627" + + "i\u02BE": "\u0626%" + "i\u02BC": "\u0626%" + "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" + "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" + + "a\u02BE": "\u0623" + "a\u02BC": "\u0623" + "\u02BEi": "\u0626" + "\u02BCi": "\u0626" + "\u02BEa\u0304": "\u0622" + "\u02BCa\u0304": "\u0622" + "\u02BEa": "\u0623" + "\u02BCa": "\u0623" + + "y\u02BCah": "\u064A\u0626\u0629" + "y\u02BEah": "\u064A\u0626\u0629" + + "y\u02BCat": "\u064A\u0626\u0629" + "y\u02BEat": "\u064A\u0626\u0629" + + # A + + "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" + "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" + + "a\u0304\u02BCi": "\u0627\u0626" + "a\u0304\u02BEi": "\u0627\u0626" + "a\u0304\u02BC": "\u0627\u0621" + "a\u0304\u02BE": "\u0627\u0621" + "%a\u0304": "\u0622" + "a\u0304": "\u0627" + + # These next two lines were intended to convert to alif-ayn when it is at + # # the beginning of a word, definite or indefinine (i.e. + # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" + "%a\u02BB": "\u0623\u0639" + "a\u02BB": "\u0639" + "a\u0301": "\u0649" + + "ayy": "\u064A" + "%a": "\u0623" + "a": "" + + # I - Capital I at beginning of word is usually alif hamzah-below. + + "%i\u0304": "\u064A" + "i\u0304y": "\u064A" + "iy": "\u064A" + "i\u0304": "\u064A" + "%\u02BBi": "\u0639" + + # "i\u02BB": "\u0625\u0639" + + "i\u02BE": "\u0626" + "i\u02BC": "\u0627\u0626" + + "%i": "\u0625" + "i": "" + + # U + + "u\u0304\u02BE": "\u0624" + "u\u0304\u02BC": "\u0624" + "%u\u0304w": "\u0623\u0648" + "%u\u0304": "\u0623\u0648" + "u\u0304w": "\u0648" + "u\u0304": "\u0648" + "u\u02BE": "\u0624" + "u\u02BC": "\u0624" + + "%u": "\u0623" + "u": "" + + # Consonants, with tashdid added + + "bb": "\u0628" + "b": "\u0628" + "thth": "\u062B" + "th": "\u062B" + "t\u0323t\u0323": "\u0637" + "t\u0323": "\u0637" + "tt": "\u062A" + "t": "\u062A" + "J": "\u062C" + "jj": "\u062C" + "j": "\u062C" + "h\u0323h\u0323": "\u062D" + "h\u0323": "\u062D" + "hh": "\u0647" + "h": "\u0647" + "Kh": "\u062E" + "khkh": "\u062E" + "kh": "\u062E" + "kk": "\u0643" + "k": "\u0643" + "dhdh": "\u0630" + "dh": "\u0630" + "d\u0323d\u0323": "\u0636" + "d\u0323": "\u0636" + "dd": "\u062F" + "d": "\u062F" + "rr": "\u0631" + "r": "\u0631" + "z\u0323z\u0323": "\u0638" + "z\u0323": "\u0638" + "zz": "\u0632" + "z": "\u0632" + "shsh": "\u0634" + "sh": "\u0634" + "s\u0323s\u0323": "\u0635" + "s\u0323": "\u0635" + "ss": "\u0633" + "s": "\u0633" + "ghgh": "\u063A" + "gh": "\u063A" + "ff": "\u0641" + "f": "\u0641" + "qq": "\u0642" + "q": "\u0642" + "ll": "\u0644" + "l": "\u0644" + "mm": "\u0645" + "m": "\u0645" + "nn": "\u0646" + "n": "\u0646" + "ww": "\u0648" + "w": "\u0648" + "yy": "\u064A" + "y": "\u064A" + + # non-Arabic consonants: + "p": "\u067E" + "ch": "\u0686" + "v": "\u06A4" + "g": "\u06AF" + + # Diacritic characters: + # ain (\u0639) - not transliterated alone: + "\u02BB": "\u0639" + # hamza - not romanized + # "\u0621" + # hamza (alone in final position) + "\u02BE%": "\u0621" + "\u02BC%": "\u0621" + + # Do not know what, if anything, is needed here: + # tatweel: + # "\u0640" + # fathatan: + # "\u064B" + # dammatan: + # "\u064C" + # kasratan: + # "\u064D" + # fatha: + # "\u064E" + # damma: + # "\u064F" + # kasra: + # "\u0650" + # shadda: + # "\u0651" + # sukun: + # "\u0652" + # superscript alef: + # "\u0670" + # alef wasla + # "\u0671" + + +script_to_roman: + hooks: + post_config: + - + - arabic.arabic_romanizer.s2r_post_config diff --git a/scriptshifter/tables/data/arabic.yml b/scriptshifter/tables/data/arabic.yml index 9755e7b..91e57b9 100644 --- a/scriptshifter/tables/data/arabic.yml +++ b/scriptshifter/tables/data/arabic.yml @@ -6,8 +6,8 @@ general: name: Arabic case_sensitive: false description: Arabic language R2S using a conversion table; S2R using a 3rd party library. - version: 1.1.0 - date: 2025-12-02 + version: 1.0.2 + date: 2026-01-28 parents: - _ignore_base @@ -20,84 +20,89 @@ general: # double prime (spacing) \u02BA # modifier letter turned comma (ayn - spacing) \u02BB # modifier letter apostrophe (alif - spacing) \u02BC - +# zero width joiner \u200C roman_to_script: map: - # Punctuation marks: + # ARABIC PUNCTUATION MARKS: "*": "\u066D" ",": "\u060C" ";": "\u061B" "?": "\u061F" - # Exceptions for specific words - - # Allah - "%alla\u0304h%": "\uFDF2" + # SPECIAL RULES FOR CERTAIN WORDS + + # ALLA[macron]H WITH SHADDAH AND ALIF KHUNJARIYAH + "%alla\u0304h%": "\u0627\u0644\u0644\u0651\u0670\u0647" + + # ALLA[macron]H (4-CHARACTER ENCODING) "alla\u0304h": "\u0627\u0644\u0644\u0647" - # Qur'an + # QUR'AN "qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" - # lillah + # LILLAH "lilla\u0304h": "\u0644\u0644\u0647" - # billah + # BILLAH "billa\u0304h": "\u0628\u0644\u0644\u0647" - # Rahman + # RAHMAN "rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" - # Ruwat + # RUWAT "ruwa\u0304t": "\u0631\u0648\u0627\u0629" - # Hadha + # HADHA "ha\u0304dha\u0304": "\u0647\u0630\u0627" - # Hadhihi + # HADHIHI "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" - # dhalika + # DHALIKA "dha\u0304lika": "\u0630\u0644\u0643" + + # SPECIAL CONVERSION FOR NAMES + "%t\u0323a\u0304ha\u0304%": "\u0637\u0647" + "%ya\u0304si\u0304n%": "\u064A\u0633\u0646" + "%\u02BBamr%": "\u0639\u0645\u0631\u0648" + "%bahjat%": "\u0628\u0647\u062C\u0629" + + # BIN WHEN IT APPEARS ISOLATED + "%bin%": "\u0628\u0646" + + # IBN WHEN IT APPEARS ISOLATED + "%ibn%": "\u0627\u0628\u0646" - # Ibn when it appears in the middle of a name sequence - "ibn": "\u0628\u0646" - - # H[dot below]aya[macron]t + # H[dot below]AYAmacron]T "%h\u0323aya\u0304t%": "\u062D\u064A\u0627\u0629" - # H[dot below]aya[macron]ti[macron] + # H[dot below]AYA[macron]TI[macron] # "h\u0323aya\u0304ti\u0304": "\u062D\u064A\u0627\u062A\u064A" - # "sh[dot below] as in "Ishaq" + # "SH[dot below] AS IN "ISHAQ" "sh\u0323": "\u0633\u062D" - # "s[prime]h" combos - "s\u02B9h": "\u0633\u0647" - - # "th[dot below]" + # DH[dot below] "th\u0323": "\u062A\u062D" - # dh[dot under] + # DH[dot below] "dh\u0323": "\u062F\u062D" - # La-hu + # LA-HU "la-hu": "\u0644\u0647" - # Mi'ah + # MI'AH "mi\u02BEah": "\u0645\u0627\u0626\u0629" "mi\u02BCah": "\u0645\u0627\u0626\u0629" - # Mi'at + # MI'AT "mi\u02BEat": "\u0645\u0627\u0626\u0629" "mi\u02BCat": "\u0645\u0627\u0626\u0629" - # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu - # will technically use \u06F0-06F9. This needs further discussion with PSD - # as RLIN21 used Hindi numbers, Connexion and Voyager does not.) - - # Edition statements with Latin number + # EDITION NUMBERS (INDIC-ARABIC NUMERALS PREFERRED IN ARABIC + # PERSIAN AND URDU PREFER TRUE ARABIC NUMERALS \u06F0-06F9 "al-t\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" "al-t\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" "al-t\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" @@ -108,7 +113,7 @@ roman_to_script: "al-t\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" "al-t\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" - # Use Basic Arabic-Indic \u0660-0669 + # OTHER NUMERALS \u0660-0669 "0": "\u0660" "1": "\u0661" "2": "\u0662" @@ -120,7 +125,7 @@ roman_to_script: "8": "\u0668" "9": "\u0669" - # Hyphenated prefixes: + # HYPHENATED PREFIXES: "wa-": "\u0648" "bi-": "\u0628" "al-": "\u0627\u0644" @@ -130,14 +135,14 @@ roman_to_script: "fi\u0304-": "\u0641\u064A" "ka-": "\u0643" - # Vowels and vowel/consonant combinations - ta-marbutah at end of word + # TA-MARBUTAH AT END OF WORD "ah%": "\u0629" "at%": "\u0629" - # tanwin at end of word + # TANWIN AT END OF WORD "an%": "\u0627" - # ayn-alif combo + # AYN-ALIF COMBOS AT END OF WORD "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621" "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621" @@ -152,10 +157,7 @@ roman_to_script: "%\u02BBa": "\u0639" # "\u02BBa%": "\u0639" - # alif and hamzas for all occasions - - # truncation necessary? It seems to work fine with. - + # ALIF AND HAMZA AT END "i\u0304\u02BEah%": "\u064A\u0626\u0629" "i\u0304\u02BCah%": "\u064A\u0626\u0629" @@ -165,8 +167,8 @@ roman_to_script: "i\u02BEa\u0304%": "\u0626\u0627" "i\u02BCa\u0304%": "\u0626\u0627" - "i\u02BE": "\u0626%" - "i\u02BC": "\u0626%" + "i\u02BE%": "\u0626" + "i\u02BC%": "\u0626" "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" @@ -186,7 +188,6 @@ roman_to_script: "y\u02BEat": "\u064A\u0626\u0629" # A - "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" @@ -197,9 +198,7 @@ roman_to_script: "%a\u0304": "\u0622" "a\u0304": "\u0627" - # These next two lines were intended to convert to alif-ayn when it is at - # # the beginning of a word, definite or indefinine (i.e. - # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" + # CONVERSION OF INITIAL A+AYN (i.e., "al-a[ayn]ma[macron]l" "%a\u02BB": "\u0623\u0639" "a\u02BB": "\u0639" "a\u0301": "\u0649" @@ -208,8 +207,6 @@ roman_to_script: "%a": "\u0623" "a": "" - # I - Capital I at beginning of word is usually alif hamzah-below. - "%i\u0304": "\u064A" "i\u0304y": "\u064A" "iy": "\u064A" @@ -217,7 +214,6 @@ roman_to_script: "%\u02BBi": "\u0639" # "i\u02BB": "\u0625\u0639" - "i\u02BE": "\u0626" "i\u02BC": "\u0627\u0626" @@ -225,7 +221,6 @@ roman_to_script: "i": "" # U - "u\u0304\u02BE": "\u0624" "u\u0304\u02BC": "\u0624" "%u\u0304w": "\u0623\u0648" @@ -234,15 +229,14 @@ roman_to_script: "u\u0304": "\u0648" "u\u02BE": "\u0624" "u\u02BC": "\u0624" - "%u": "\u0623" "u": "" - # Consonants, with tashdid added - + # CONSONANTS WITH TASHDID/SHADDAH (GEMINATION MARK) ARE DOUBLED "bb": "\u0628" "b": "\u0628" "thth": "\u062B" + "t\u02B9h": "\u062B\u0647" "th": "\u062B" "t\u0323t\u0323": "\u0637" "t\u0323": "\u0637" @@ -255,12 +249,13 @@ roman_to_script: "h\u0323": "\u062D" "hh": "\u0647" "h": "\u0647" - "Kh": "\u062E" "khkh": "\u062E" + "k\u02B9h": "\u0643\u0647" "kh": "\u062E" "kk": "\u0643" "k": "\u0643" "dhdh": "\u0630" + "d\u02B9h": "\u062F\u0647" "dh": "\u0630" "d\u0323d\u0323": "\u0636" "d\u0323": "\u0636" @@ -273,6 +268,7 @@ roman_to_script: "zz": "\u0632" "z": "\u0632" "shsh": "\u0634" + "s\u02B9h": "\u0633\u0647" "sh": "\u0634" "s\u0323s\u0323": "\u0635" "s\u0323": "\u0635" @@ -294,23 +290,22 @@ roman_to_script: "w": "\u0648" "yy": "\u064A" "y": "\u064A" + "\u0289": "\u200C" - # non-Arabic consonants: + # NON-ARABIC CONSONANTS: "p": "\u067E" "ch": "\u0686" "v": "\u06A4" "g": "\u06AF" - # Diacritic characters: - # ain (\u0639) - not transliterated alone: + # SPECIAL CHARACTERS: + # AYN (\u0639) NEVER TRANSLITERATED WHEN ALONE: "\u02BB": "\u0639" - # hamza - not romanized - # "\u0621" - # hamza (alone in final position) + # HAMZA ALONE IN FINAL POSITION "\u02BE%": "\u0621" "\u02BC%": "\u0621" - # Do not know what, if anything, is needed here: + # ARABOC VOWEL MARKS NOT USUALLY GIVEN # tatweel: # "\u0640" # fathatan: @@ -334,6 +329,8 @@ roman_to_script: # alef wasla # "\u0671" + # PRIME USED TO FORCE FINAL LETTER SHAPE OF PRECEDING CONSONANT + "\u02B9": "\u200C" script_to_roman: hooks: diff --git a/scriptshifter/tables/data/bodo_bengali.yml b/scriptshifter/tables/data/bodo_bengali.yml new file mode 100644 index 0000000..7768d2b --- /dev/null +++ b/scriptshifter/tables/data/bodo_bengali.yml @@ -0,0 +1,19 @@ +--- +general: + name: Bodo (Bengali) + case_sensitive: false + description: Bodo (Boro) language in the Bengali script. + version: 1.0.0 + date: 2026-01-08 + parents: + - _bengali_base + +roman_to_script: + map: + " ": " " + # NO ADDITIONAL MAPPINGS TO APPLY + +script_to_roman: + map: + " ": " " + # NO ADDITIONAL MAPPINGS TO APPLY diff --git a/scriptshifter/tables/data/bodo_devanagari.yml b/scriptshifter/tables/data/bodo_devanagari.yml new file mode 100644 index 0000000..8831ecd --- /dev/null +++ b/scriptshifter/tables/data/bodo_devanagari.yml @@ -0,0 +1,19 @@ +--- +general: + name: Bodo (Devanagari) + case_sensitive: false + description: Bodo (Boro) language in the Devanagari script. + version: 1.0.0 + date: 2026-01-08 + parents: + - devanagari_generic + +roman_to_script: + map: + " ": " " + # NO ADDITIONAL MAPPINGS TO APPLY + +script_to_roman: + map: + " ": " " + # NO ADDITIONAL MAPPINGS TO APPLY diff --git a/scriptshifter/tables/data/chuvash_cyrillic.yml b/scriptshifter/tables/data/chuvash_cyrillic.yml index c715ffa..06eb84c 100644 --- a/scriptshifter/tables/data/chuvash_cyrillic.yml +++ b/scriptshifter/tables/data/chuvash_cyrillic.yml @@ -2,20 +2,20 @@ general: name: Chuvash (Cyrillic) description: Chuvash language in Cyrillic script. - version: 1.0.0 - date: 2025-12-01 + version: 1.0.1 + date: 2026-01-29 parents: - cyrillic_generic roman_to_script: map: - "A\u0306": "\u0400" - "a\u0306": "\u0401" + "A\u0306": "\u04D0" + "a\u0306": "\u04D1" "CH": "\u040B" "Ch": "\u040B" "ch": "\u045B" - "E\u0306": "\u04D8" - "e\u0306": "\u04D9" + "E\u0306": "\u04D6" + "e\u0306": "\u04D7" "L\u0301": "\u0520" "l\u0301": "\u0521" "N\u0301": "\u0522" @@ -31,19 +31,19 @@ script_to_roman: map: "\u0422\u030C": "T\u0301" "\u0442\u030C": "t\u0301" - "\u0400": "A\u0306" - "\u0401": "a\u0306" - "\u0409": "L\u0301" + "\u04D0": "A\u0306" + "\u04D1": "a\u0306" "\u040B": "Ch" - "\u0459": "l\u0301" "\u045B": "ch" "\u04AA": "S\u0301" "\u04AB": "s\u0301" - "\u04AE": "U\u0307" - "\u04AF": "u\u0307" - "\u04D8": "E\u0306" - "\u04D9": "e\u0306" + "\u04F0": "U\u0307" + "\u04F1": "u\u0307" + "\u04D6": "E\u0306" + "\u04D7": "e\u0306" "\u04F2": "U\u0307" "\u04F3": "u\u0307" + "\u0520": "L\u0301" + "\u0521": "l\u0301" "\u0522": "N\u0301" "\u0523": "n\u0301" diff --git a/scriptshifter/tables/data/coptic.yml b/scriptshifter/tables/data/coptic.yml new file mode 100644 index 0000000..5383c02 --- /dev/null +++ b/scriptshifter/tables/data/coptic.yml @@ -0,0 +1,341 @@ +--- +general: + name: Coptic + description: Coptic language in the Coptic script. + version: 1.0.0 + date: 2026-01-22 + parents: + - _ignore_base + +# COMMON COMBINING CHARACTERS (always follow a base letter): +# combining macron \u0304 +# combining overline \u0305 used with Coptic numbers +# combining breve \u0306 +# combining diaeresis \u0308 +# combining caron (hachek) \u030C +# combining dot below \u0323 +# combining dieresis below \u0324 +# combining cedilla \u0327 (French, Turkish, Azeri) +# combining low line \u0332 +# combining double low line \u0333 +# combining double overline \u033F used with Coptic numbers +# apostrophe (spacing) \u0027 +# soft sign/prime (spacing) \u02B9 + +roman_to_script: + map: + + "A": "\u2C80" + "a": "\u2C81" + "B": "\u2C82" + "b": "\u2C83" + "CH": "\u2CAC" + "Ch": "\u2CAC" + "ch": "\u2CAD" + "C\u0327\u0308": "\u2CC2" + "C\u0308\u0327": "\u2CC2" + "c\u0327\u0308": "\u2CC3" + "c\u0308\u0327": "\u2CC3" + "C\u0327": "\u2CCA" + "c\u0327": "\u2CCB" + "C\u030C": "\u2CEA" + "c\u030C": "\u2CEB" + "D": "\u2C86" + "d": "\u2C87" + "E\u0304": "\u2C8E" + "e\u0304": "\u2C8F" + "E": "\u2C88" + "e": "\u2C89" + "F": "\u03E4" + "f": "\u03E5" + "G": "\u2C84" + "g": "\u2C85" + "H\u0323": "\u03E6" + "h\u0323": "\u03E7" + "TI": "\u03EE" + "Ti": "\u03EE" + "ti": "\u03EF" + "I\u0308": "\u03AA" + "i\u0308": "\u03CA" + "I": "\u2C92" + "i": "\u2C93" + "KY": "\u2CD2" + "Ky": "\u2CD2" + "ky": "\u2CD3" + "K\u0324": "\u2CB8" + "k\u0324": "\u2CB9" + "K": "\u2C94" + "k": "\u2C95" + "L": "\u2C96" + "l": "\u2C97" + "M": "\u2C98" + "m": "\u2C99" + "N\u0308": "\u2CBA" + "n\u0308": "\u2CBB" + "N\u0332": "\u2CE0" + "n\u0332": "\u2CE1" + "N\u0333": "\u2CDE" + "n\u0333": "\u2CDF" + "\u019D": "\u2CE0" + "\u0272": "\u2CE1" + "\u014A": "\u2CDE" + "\u014B": "\u2CDF" + "N": "\u2C9A" + "n": "\u2C9B" + "O\u0304": "\u2CB0" + "o\u0304": "\u2CB1" + "O": "\u2C9E" + "o": "\u2C9F" + "P\u02B9H": "\u2CA0\u2CC8" + "P\u02B9h": "\u2CA0\u2CC8" + "p\u02B9h": "\u2CA1\u2CC9" + "PH": "\u2CAA" + "Ph": "\u2CAA" + "ph": "\u2CAB" + "P\u02B9S": "\u2CA0\u2CA4" + "P\u02B9s": "\u2CA0\u2CA5" + "p\u02B9s": "\u2CA1\u2CA5" + "PS": "\u2CAE" + "Ps": "\u2CAE" + "ps": "\u2CAF" + "P": "\u2CA0" + "p": "\u2CA1" + "R": "\u2CA2" + "r": "\u2CA3" + "S\u030C": "\u03E2" + "s\u030C": "\u03E3" + "S": "\u2CA4" + "s": "\u2CA5" + "T\u02B9H": "\u2CA6\u2CC8" + "T\u02B9h": "\u2CA6\u2CC8" + "t\u02B9h": "\u2CA7\u2CC9" + "TH": "\u2C90" + "Th": "\u2C90" + "th": "\u2C91" + "H": "\u03E8" + "h": "\u03E9" + "T": "\u2CA6" + "t": "\u2CA7" + "U\u0308": "\u03AB" + "u\u0308": "\u03CB" + "U": "\u2CA8" + "u": "\u2CA9" + # CONSONANT "V/v" SHOULD NOT OCCUR + "V": "\u2CE2" + "v": "\u2CE3" + "W": "\u2CE2" + "w": "\u2CE3" + "X\u0308": "\u2CC8" + "x\u0308": "\u2CC9" + "X": "\u2C9C" + "x": "\u2C9D" + "Z": "\u2C8C" + "z": "\u2C8D" + "\u0027\u0333": "\u2CB2" + "\u0027": "\u2CB3" + + # COPTIC NUMBERS + "1000": "\u2C81\u033F" + "100": "\u2CA3\u0305" + "10": "\u2C93\u0305" + "1": "\u2C81\u0305" + "2000": "\u2C83\u033F" + "200": "\u2CA5\u0305" + "20": "\u2C95\u0305" + "2": "\u2C83\u0305" + "3000": "\u2C85\u033F" + "300": "\u2CA7\u0305" + "30": "\u2C97\u0305" + "3": "\u2C85\u0305" + "4000": "\u2C87\u033F" + "400": "\u2CA9\u0305" + "40": "\u2C99\u0305" + "4": "\u2C87\u0305" + "5000": "\u2C89\u033F" + "500": "\u2CAB\u0305" + "50": "\u2C9B\u0305" + "5": "\u2C89\u0305" + "6000": "\u2C8B\u033F" + "600": "\u2CAD\u0305" + "60": "\u2C9D\u0305" + "6": "\u2C8B\u0305" + "7000": "\u2C8D\u033F" + "700": "\u2CAF\u0305" + "70": "\u2C9F\u0305" + "7": "\u2C8D\u0305" + "8000": "\u2C8F\u033F" + "800": "\u2CB1\u0305" + "80": "\u2CA1\u0305" + "8": "\u2C8F\u0305" + "9000": "\u2C91\u033F" + "900": "\u2CC1\u0305" + "90": "\u03E5\u0305" + "9": "\u2C91\u0305" + + # COPTIC PUNCTUATION + "\u002E": "\u2CFE" + "\u003F": "\u2CFA" + "\u00BF": "\u2CFB" + "\u002F\u002F": "\u2CFC" + "\u005C\u005C": "\u2CF9" + "\u0060": "\u2CFF" + "\u0026": "\u2CE4" + +script_to_roman: + map: + + "\u2C80": "A" + "\u2C81": "a" + "\u2C82": "B" + "\u2C83": "b" + "\u2CAC": "CH" + "\u2CAC": "Ch" + "\u2CAD": "ch" + "\u2CC2": "C\u0327\u0308" + "\u2CC2": "C\u0308\u0327" + "\u2CC3": "c\u0327\u0308" + "\u2CC3": "c\u0308\u0327" + "\u2CCA": "C\u0327" + "\u2CCB": "c\u0327" + "\u2CEA": "C\u030C" + "\u2CEB": "c\u030C" + "\u2C86": "D" + "\u2C87": "d" + "\u2C8E": "E\u0304" + "\u2C8F": "e\u0304" + "\u2C88": "E" + "\u2C89": "e" + "\u03E4": "F" + "\u03E5": "f" + "\u2C84": "G" + "\u2C85": "g" + "\u03E6": "H\u0323" + "\u03E7": "h\u0323" + "\u03EE": "TI" + "\u03EE": "Ti" + "\u03EF": "ti" + "\u03AA": "I\u0308" + "\u03CA": "i\u0308" + "\u2C92": "I" + "\u2C93": "i" + "\u2CD2": "KY" + "\u2CD2": "Ky" + "\u2CD3": "ky" + "\u2CB8": "K\u0324" + "\u2CB9": "k\u0324" + "\u2C94": "K" + "\u2C95": "k" + "\u2C96": "L" + "\u2C97": "l" + "\u2C98": "M" + "\u2C99": "m" + "\u2CBA": "N\u0308" + "\u2CBB": "n\u0308" + "\u2CE0": "N\u0332" + "\u2CE1": "n\u0332" + "\u2CDE": "N\u0333" + "\u2CDF": "n\u0333" + "\u2CE0": "\u019D" + "\u2CE1": "\u0272" + "\u2CDE": "\u014A" + "\u2CDF": "\u014B" + "\u2C9A": "N" + "\u2C9B": "n" + "\u2CB0": "O\u0304" + "\u2CB1": "o\u0304" + "\u2C9E": "O" + "\u2C9F": "o" + "\u2CA0\u2CC8": "P\u02B9H" + "\u2CA0\u2CC8": "P\u02B9h" + "\u2CA1\u2CC9": "p\u02B9h" + "\u2CAA": "PH" + "\u2CAA": "Ph" + "\u2CAB": "ph" + "\u2CA0\u2CA4": "P\u02B9S" + "\u2CA0\u2CA5": "P\u02B9s" + "\u2CA1\u2CA5": "p\u02B9s" + "\u2CAE": "PS" + "\u2CAE": "Ps" + "\u2CAF": "ps" + "\u2CA0": "P" + "\u2CA1": "p" + "\u2CA2": "R" + "\u2CA3": "r" + "\u03E2": "S\u030C" + "\u03E3": "s\u030C" + "\u2CA4": "S" + "\u2CA5": "s" + "\u2CA6\u2CC8": "T\u02B9H" + "\u2CA6\u2CC8": "T\u02B9h" + "\u2CA7\u2CC9": "t\u02B9h" + "\u2C90": "TH" + "\u2C90": "Th" + "\u2C91": "th" + "\u03E8": "H" + "\u03E9": "h" + "\u2CA6": "T" + "\u2CA7": "t" + "\u03AB": "U\u0308" + "\u03CB": "u\u0308" + "\u2CA8": "U" + "\u2CA9": "u" + # CONSONANT "V/v" SHOULD NOT OCCUR + "\u2CE2": "V" + "\u2CE3": "v" + "\u2CE2": "W" + "\u2CE3": "w" + "\u2CC8": "X\u0308" + "\u2CC9": "x\u0308" + "\u2C9C": "X" + "\u2C9D": "x" + "\u2C8C": "Z" + "\u2C8D": "z" + "\u2CB2": "\u0027\u0333" + "\u2CB3": "\u0027" + + # COPTIC NUMBERS + "\u2C81\u033F": "1000" + "\u2CA3\u0305": "100" + "\u2C93\u0305": "10" + "\u2C81\u0305": "1" + "\u2C83\u033F": "2000" + "\u2CA5\u0305": "200" + "\u2C95\u0305": "20" + "\u2C83\u0305": "2" + "\u2C85\u033F": "3000" + "\u2CA7\u0305": "300" + "\u2C97\u0305": "30" + "\u2C85\u0305": "3" + "\u2C87\u033F": "4000" + "\u2CA9\u0305": "400" + "\u2C99\u0305": "40" + "\u2C87\u0305": "4" + "\u2C89\u033F": "5000" + "\u2CAB\u0305": "500" + "\u2C9B\u0305": "50" + "\u2C89\u0305": "5" + "\u2C8B\u033F": "6000" + "\u2CAD\u0305": "600" + "\u2C9D\u0305": "60" + "\u2C8B\u0305": "6" + "\u2C8D\u033F": "7000" + "\u2CAF\u0305": "700" + "\u2C9F\u0305": "70" + "\u2C8D\u0305": "7" + "\u2C8F\u033F": "8000" + "\u2CB1\u0305": "800" + "\u2CA1\u0305": "80" + "\u2C8F\u0305": "8" + "\u2C91\u033F": "9000" + "\u2CC1\u0305": "900" + "\u03E5\u0305": "90" + "\u2C91\u0305": "9" + + # COPTIC PUNCTUATION + "\u2CFE": "\u002E" + "\u2CFA": "\u003F" + "\u2CFB": "\u00BF" + "\u2CFC": "\u002F\u002F" + "\u2CF9": "\u005C\u005C" + "\u2CFF": "\u0060" + "\u2CE4": "\u0026" diff --git a/scriptshifter/tables/data/glagolitic.yml b/scriptshifter/tables/data/glagolitic.yml new file mode 100644 index 0000000..2fd02ad --- /dev/null +++ b/scriptshifter/tables/data/glagolitic.yml @@ -0,0 +1,221 @@ +--- +general: + name: Glagolitic + description: Glagolitic script, used mostly for Church Slavonic language. + version: 1.0.0 + date: 2026-01-21 + parents: + - _ignore_base + +roman_to_script: + map: + + "A\u0323": "\u2C2D" + "a\u0323": "\u2C5D" + "A": "\u2C00" + "a": "\u2C30" + "B": "\u2C01" + "b": "\u2C31" + "C\u0307H": "\u2C2F" + "C\u0307h": "\u2C2F" + "c\u0307h": "\u2C5F" + "CH": "\u2C1D" + "Ch": "\u2C1D" + "ch": "\u2C4D" + "D": "\u2C04" + "d": "\u2C34" + "E\u0328\u0307": "\u2C25" + "e\u0328\u0307": "\u2C55" + "E\u030C": "\u2C26" + "e\u030C": "\u2C56" + "E\u0328": "\u2C24" + "e\u0328": "\u2C54" + "E": "\u2C05" + "e": "\u2C35" + "F\u0307": "\u2C2A" + "f\u0307": "\u2C5A" + "F": "\u2C17" + "f": "\u2C47" + "G\u0301": "\u2C0C" + "g\u0301": "\u2C3C" + "G": "\u2C03" + "g": "\u2C33" + "I\u0361A": "\u2C21" + "I\u0361a": "\u2C21" + "i\u0361a": "\u2C51" + "I\u0361E\u0328": "\u2C27" + "I\u0361e\u0328": "\u2C27" + "i\u0361e\u0328": "\u2C57" + "I\u0361O\u0328": "\u2C29" + "I\u0361o\u0328": "\u2C29" + "i\u0361o\u0328": "\u2C59" + "I\u0361U\u0328": "\u2C23" + "I\u0361u\u0328": "\u2C23" + "i\u0361u\u0328": "\u2C53" + "I\u0308": "\u2C0B" + "i\u0308": "\u2C3B" + "I": "\u2C09" + "i": "\u2C39" + "K\u0307H": "\u2C22" + "K\u0307h": "\u2C22" + "k\u0307h": "\u2C52" + "KH": "\u2C18" + "Kh": "\u2C18" + "kh": "\u2C48" + "K": "\u2C0D" + "k": "\u2C3D" + "L": "\u2C0E" + "l": "\u2C3E" + "M\u0307": "\u2C2E" + "m\u0307": "\u2C5E" + "M": "\u2C0F" + "m": "\u2C3F" + "N": "\u2C10" + "n": "\u2C40" + "O\u0304": "\u2C19" + "o\u0304": "\u2C49" + "O\u0328": "\u2C28" + "o\u0328": "\u2C58" + "O": "\u2C11" + "o": "\u2C41" + "P\u0307": "\u2C1A" + "p\u0307": "\u2C4A" + "P": "\u2C12" + "p": "\u2C42" + "R": "\u2C13" + "r": "\u2C43" + "SHT": "\u2C1B" + "SHt": "\u2C1B" + "Sht": "\u2C1B" + "sht": "\u2C4B" + "SH": "\u2C1E" + "Sh": "\u2C1E" + "sh": "\u2C4E" + "S": "\u2C14" + "s": "\u2C44" + "T\u0361S": "\u2C1C" + "T\u0361s": "\u2C1C" + "t\u0361s": "\u2C4C" + "T": "\u2C15" + "t": "\u2C45" + "U": "\u2C16" + "u": "\u2C46" + "V": "\u2C02" + "v": "\u2C32" + "Y\u0307": "\u2C2B" + "y\u0307": "\u2C5B" + "Y": "\u2C0A" + "y": "\u2C3A" + "\u02B9\u0333": "\u2C20" + "\u02B9": "\u2C50" + "\u02BA\u0333\u0307": "\u2C2C" + "\u02BA\u0307": "\u2C5C" + "\u02BA\u0333": "\u2C1F" + "\u02BA": "\u2C4F" + "Z\u0307": "\u2C07" + "z\u0307": "\u2C37" + "ZH": "\u2C06" + "Zh": "\u2C06" + "zh": "\u2C36" + "Z": "\u2C08" + "z": "\u2C38" + +script_to_roman: + map: + + "\u2C00": "A" + "\u2C01": "B" + "\u2C02": "V" + "\u2C03": "G" + "\u2C04": "D" + "\u2C05": "E" + "\u2C06": "Zh" + "\u2C07": "Z\u0307" + "\u2C08": "Z" + "\u2C09": "I" + "\u2C0A": "Y" + "\u2C0B": "I\u0308" + "\u2C0C": "G\u0301" + "\u2C0D": "K" + "\u2C0E": "L" + "\u2C0F": "M" + "\u2C10": "N" + "\u2C11": "O" + "\u2C12": "P" + "\u2C13": "R" + "\u2C14": "S" + "\u2C15": "T" + "\u2C16": "U" + "\u2C17": "F" + "\u2C18": "Kh" + "\u2C19": "O\u0304" + "\u2C1A": "P\u0307" + "\u2C1B": "Sht" + "\u2C1C": "T\u0361S" + "\u2C1D": "Ch" + "\u2C1E": "Sh" + "\u2C1F": "\u02BA\u0333" + "\u2C20": "\u02B9\u0333" + "\u2C21": "I\u0361A" + "\u2C22": "K\u0307h" + "\u2C23": "I\u0361U\u0328" + "\u2C24": "E\u0328" + "\u2C25": "E\u0328\u0307" + "\u2C26": "E\u030C" + "\u2C27": "I\u0361E\u0328" + "\u2C28": "O\u0328" + "\u2C29": "I\u0361O\u0328" + "\u2C2A": "F\u0307" + "\u2C2B": "Y\u0307" + "\u2C2C": "\u02BA\u0333\u0307" + "\u2C2D": "A\u0323" + "\u2C2E": "M\u0307" + "\u2C2F": "C\u0307h" + "\u2C30": "a" + "\u2C31": "b" + "\u2C32": "v" + "\u2C33": "g" + "\u2C34": "d" + "\u2C35": "e" + "\u2C36": "zh" + "\u2C37": "z\u0307" + "\u2C38": "z" + "\u2C39": "i" + "\u2C3A": "y" + "\u2C3B": "i\u0308" + "\u2C3C": "g\u0301" + "\u2C3D": "k" + "\u2C3E": "l" + "\u2C3F": "m" + "\u2C40": "n" + "\u2C41": "o" + "\u2C42": "p" + "\u2C43": "r" + "\u2C44": "s" + "\u2C45": "t" + "\u2C46": "u" + "\u2C47": "f" + "\u2C48": "kh" + "\u2C49": "o\u0304" + "\u2C4A": "p\u0307" + "\u2C4B": "sht" + "\u2C4C": "t\u0361s" + "\u2C4D": "ch" + "\u2C4E": "sh" + "\u2C4F": "\u02BA" + "\u2C50": "\u02B9" + "\u2C51": "i\u0361a" + "\u2C52": "k\u0307h" + "\u2C53": "i\u0361u\u0328" + "\u2C54": "e\u0328" + "\u2C55": "E\u0328\u0307" + "\u2C56": "e\u030C" + "\u2C57": "i\u0361e\u0328" + "\u2C58": "o\u0328" + "\u2C59": "i\u0361o\u0328" + "\u2C5A": "f\u0307" + "\u2C5B": "y\u0307" + "\u2C5C": "\u02BA\u0307" + "\u2C5D": "a\u0323" + "\u2C5E": "m\u0307" + "\u2C5F": "c\u0307h" diff --git a/scriptshifter/tables/data/inuktitut.yml b/scriptshifter/tables/data/inuktitut.yml new file mode 100644 index 0000000..e27e3ea --- /dev/null +++ b/scriptshifter/tables/data/inuktitut.yml @@ -0,0 +1,314 @@ +--- +general: + name: Inuktitut + case_sensitive: false + description: Inuktitut in Unified Canadian Aboriginal syllabics. + version: 1.0.0 + date: 2026-01-25 + parents: + - _ignore_base + +roman_to_script: + map: + "\u0026aai": "\u0026aai" + "\u0026aa": "\u15A5" + "\u0026a": "\u15A4" + "\u0026ii": "\u15A1" + "\u0026i": "\u15A0" + "\u0026uu": "\u15A3" + "\u0026u": "\u15A2" + "\u0026": "\u15A6" + "haa": "\u157A" + "hai": "\u1574" + "ha": "\u1579" + "hii": "\u1576" + "hi": "\u1575" + "huu": "\u1578" + "hu": "\u1577" + "h\u0323": "\u157C" + "h": "\u157B" + "jaai": "\u1527" + "jaa": "\u152E" + "jai": "\u1526" + "ja": "\u152D" + "jii": "\u1529" + "ji": "\u1528" + "juu": "\u152B" + "ju": "\u152A" + "j": "\u153E" + "kaai": "\u146C" + "kaa": "\u1473" + "kai": "\u146B" + "ka": "\u1472" + "kii": "\u146E" + "ki": "\u146D" + "kuu": "\u1470" + "ku": "\u146F" + "k": "\u1483" + "laai": "\u14D4" + "laa": "\u14DB" + "lai": "\u14D3" + "la": "\u14DA" + "lii": "\u14D6" + "li": "\u14D5" + "luu": "\u14D8" + "lu": "\u14D7" + "l": "\u14EA" + "maai": "\u14A4" + "maa": "\u14AB" + "mai": "\u14A3" + "ma": "\u14AA" + "mii": "\u14A6" + "mi": "\u14A5" + "muu": "\u14A8" + "mu": "\u14A7" + "m": "\u14BB" + "nngaa": "\u1676" + "nnga": "\u1675" + "nngii": "\u1672" + "nngi": "\u1671" + "nnguu": "\u1674" + "nngu": "\u1673" + "nng": "\u1596" + "ngaai": "\u158E" + "ngaa": "\u1594" + "ngai": "\u1670" + "nga": "\u1593" + "ngii": "\u1590" + "ngi": "\u158F" + "nguu": "\u1592" + "ngu": "\u1591" + "ng": "\u1595" + "gaai": "\u148A" + "gaa": "\u1491" + "gai": "\u1489" + "ga": "\u1490" + "gii": "\u148C" + "gi": "\u148B" + "guu": "\u148E" + "gu": "\u148D" + "g": "\u14A1" + "naai": "\u14C1" + "naa": "\u14C8" + "nai": "\u14C0" + "na": "\u14C7" + "nii": "\u14C3" + "ni": "\u14C2" + "nuu": "\u14C6" + "nu": "\u14C4" + "n": "\u14D0" + "paai": "\u1430" + "paa": "\u1439" + "pai": "\u142F" + "pa": "\u1438" + "pii": "\u1432" + "pi": "\u1431" + "puu": "\u1434" + "pu": "\u1433" + "p": "\u1449" + "qaai": "\u157E" + "qaa": "\u1584" + "qai": "\u166F" + "qa": "\u1583" + "qii": "\u1580" + "qi": "\u157F" + "quu": "\u1582" + "qu": "\u1581" + "q": "\u1585" + "raai": "\u1545" + "raa": "\u154C" + "rai": "\u1543" + "ra": "\u154B" + "rii": "\u1547" + "ri": "\u1546" + "ruu": "\u1549" + "ru": "\u1548" + "r": "\u1550" + "saai": "\u14EE" + "saa": "\u14F5" + "sai": "\u14ED" + "sa": "\u14F4" + "sii": "\u14F0" + "si": "\u14EF" + "suu": "\u14F2" + "su": "\u14F1" + "s": "\u1505" + "taai": "\u144D" + "taa": "\u1456" + "tai": "\u144C" + "ta": "\u1455" + "tii": "\u144F" + "ti": "\u144E" + "tuu": "\u1451" + "tu": "\u1450" + "t": "\u14BC" + "vaai": "\u1554" + "vaa": "\u155A" + "vai": "\u1553" + "va": "\u1559" + "vii": "\u1556" + "vi": "\u1555" + "vuu": "\u1558" + "vu": "\u1557" + "v": "\u155D" + "aai": "\u1402" + "aa": "\u140B" + "ai": "\u1401" + "a": "\u140A" + "ii": "\u1404" + "i": "\u1403" + "uu": "\u1406" + "u": "\u1405" + +script_to_roman: + map: + + "\u0026aai": "\u0026aai" + "\u1401": "ai" + "\u1402": "aai" + "\u1403": "i" + "\u1404": "ii" + "\u1405": "u" + "\u1406": "uu" + "\u140A": "a" + "\u140B": "aa" + "\u142F": "pai" + "\u1430": "paai" + "\u1431": "pi" + "\u1432": "pii" + "\u1433": "pu" + "\u1434": "puu" + "\u1438": "pa" + "\u1439": "paa" + "\u1449": "p" + "\u144C": "tai" + "\u144D": "taai" + "\u144E": "ti" + "\u144F": "tii" + "\u1450": "tu" + "\u1451": "tuu" + "\u1455": "ta" + "\u1456": "taa" + "\u146B": "kai" + "\u146C": "kaai" + "\u146D": "ki" + "\u146E": "kii" + "\u146F": "ku" + "\u1470": "kuu" + "\u1472": "ka" + "\u1473": "kaa" + "\u1483": "k" + "\u1489": "gai" + "\u148A": "gaai" + "\u148B": "gi" + "\u148C": "gii" + "\u148D": "gu" + "\u148E": "guu" + "\u1490": "ga" + "\u1491": "gaa" + "\u14A1": "g" + "\u14A3": "mai" + "\u14A4": "maai" + "\u14A5": "mi" + "\u14A6": "mii" + "\u14A7": "mu" + "\u14A8": "muu" + "\u14AA": "ma" + "\u14AB": "maa" + "\u14BB": "m" + "\u14BC": "t" + "\u14C0": "nai" + "\u14C1": "naai" + "\u14C2": "ni" + "\u14C3": "nii" + "\u14C4": "nu" + "\u14C6": "nuu" + "\u14C7": "na" + "\u14C8": "naa" + "\u14D0": "n" + "\u14D3": "lai" + "\u14D4": "laai" + "\u14D5": "li" + "\u14D6": "lii" + "\u14D7": "lu" + "\u14D8": "luu" + "\u14DA": "la" + "\u14DB": "laa" + "\u14EA": "l" + "\u14ED": "sai" + "\u14EE": "saai" + "\u14EF": "si" + "\u14F1": "su" + "\u14F2": "suu" + "\u14F4": "sa" + "\u14F5": "saa" + "\u14F0": "sii" + "\u1505": "s" + "\u1526": "jai" + "\u1527": "jaai" + "\u152B": "juu" + "\u1528": "ji" + "\u1529": "jii" + "\u152A": "ju" + "\u152D": "ja" + "\u152E": "jaa" + "\u153E": "j" + "\u1543": "rai" + "\u1545": "raai" + "\u1546": "ri" + "\u1547": "rii" + "\u1548": "ru" + "\u1549": "ruu" + "\u154B": "ra" + "\u154C": "raa" + "\u1550": "r" + "\u1553": "vai" + "\u1554": "vaai" + "\u1555": "vi" + "\u1556": "vii" + "\u1557": "vu" + "\u1558": "vuu" + "\u1559": "va" + "\u155A": "vaa" + "\u155D": "v" + "\u1574": "hai" + "\u1575": "hi" + "\u1576": "hii" + "\u1577": "hu" + "\u1578": "huu" + "\u1579": "ha" + "\u157A": "haa" + "\u157B": "h" + "\u157C": "h\u0323" + "\u157E": "qaai" + "\u157F": "qi" + "\u1580": "qii" + "\u1581": "qu" + "\u1582": "quu" + "\u1583": "qa" + "\u1584": "qaa" + "\u1585": "q" + "\u158E": "ngaai" + "\u158F": "ngi" + "\u1590": "ngii" + "\u1591": "ngu" + "\u1592": "nguu" + "\u1593": "nga" + "\u1594": "ngaa" + "\u1595": "ng" + "\u1596": "nng" + "\u15A0": "\u0026i" + "\u15A1": "\u0026ii" + "\u15A2": "\u0026u" + "\u15A3": "\u0026uu" + "\u15A4": "\u0026a" + "\u15A5": "\u0026aa" + "\u15A6": "\u0026" + "\u166F": "qai" + "\u1670": "ngai" + "\u1672": "nngii" + "\u1673": "nngu" + "\u1671": "nngi" + "\u1674": "nnguu" + "\u1675": "nnga" + "\u1676": "nngaa" diff --git a/scriptshifter/tables/data/pali.yml b/scriptshifter/tables/data/pali.yml deleted file mode 100644 index ff23125..0000000 --- a/scriptshifter/tables/data/pali.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -general: - name: Pali - case_sensitive: false - -script_to_roman: - hooks: - post_config: - - - - aksharamukha.romanizer.s2r_post_config - - src_script: "Pali" - -roman_to_script: - hooks: - post_config: - - - - aksharamukha.romanizer.r2s_post_config - - dest_script: "Pali" diff --git a/scriptshifter/tables/index.yml b/scriptshifter/tables/index.yml index 3c4b0e1..3ae85e5 100644 --- a/scriptshifter/tables/index.yml +++ b/scriptshifter/tables/index.yml @@ -1,5 +1,6 @@ ---- -# Map index file. +# Map index file +# version: 1.0.3 +# date: 2026-01-29 # # Configurations not listed here will not show in the UI drop-down menu or # in the `/languages` endpoint, but can still be used in the `trans` endpoint. @@ -8,6 +9,7 @@ # key within the entry is the human-readable label that can be used in a # multiple-choice menu. +--- abazin_cyrillic: marc_code: cau name: Abazin (Cyrillic) @@ -40,9 +42,12 @@ armenian: assamese: marc_code: asm name: Assamese +avaric_cyrillic: + marc_code: ava + name: Avaric (Cyrillic) awadhi_devanagari: conf: "devanagari_generic.yml" - marc_code: bih + marc_code: awa name: Awadhi (Devanagari) azerbaijani_cyrillic: marc_code: aze @@ -65,6 +70,12 @@ bihari_devanagari: conf: "devanagari_generic.yml" marc_code: bih name: Bihari (Devanagari) +bodo_bengali: + marc_code: sit + name: Bodo (Bengali) +bodo_devanagari: + marc_code: sit + name: Bodo (Devanagari) braj_devanagari: conf: "devanagari_generic.yml" marc_code: bra @@ -87,12 +98,8 @@ chechen_cyrillic: chinese: marc_code: chi name: Chinese (Hanzi) -chinese_numerals: - marc_code: chi - name: Chinese (Arabic numerals) - description: > - Chinese transliteration that converts Chinese numerals to Arabic numerals. chukchi_cyrillic: + marc_code: mis name: Chukchi (Cyrillic) church_slavonic: marc_code: chu @@ -127,12 +134,12 @@ eskimo_cyrillic: ethiopic_generic: marc_code: mul name: Ethiopic (Generic) -even-evenki_cyrillic: - marc_code: tut - name: Even/Evenki (Cyrillic) eskimo-yuit_cyrillic: marc_code: ypk name: Eskimo-Yuit (Cyrillic) +even-evenki_cyrillic: + marc_code: tut + name: Even/Evenki (Cyrillic) fula_adlam: marc_code: ful name: Fula (ADLaM) @@ -146,6 +153,9 @@ gilyak_cyrillic: conf: "cyrillic_generic.yml" marc_code: mis name: Gilyak (Cyrillic) +glagolitic: + marc_code: chu + name: Glagolitic greek_classical: marc_code: grc name: Greek (Classical) @@ -167,14 +177,23 @@ hindi: name: Hindi (Devanagari) marc_code: hin ingush_cyrillic: - marc_code: inh + conf: "cyrillic_generic.yml" name: Ingush (Cyrillic) + marc_code: inh +inuit_cyrillic: + conf: "cyrillic_generic.yml" + name: Inuit (Cyrillic) + marc_code: ipk japanese_hiragana: marc_code: jpn name: Japanese (Hiragana) japanese_katakana: marc_code: jpn name: Japanese (Katakana) +kabardian_cyrillic: + conf: "cyrillic_generic.yml" + name: Kabardian (Cyrillic) + marc_code: kbd kalmyk_cyrillic: marc_code: xal name: Kalmyk (Cyrillic) @@ -185,6 +204,7 @@ kannada: marc_code: kan name: Kannada kara-kalpak_cyrillic: + conf: "cyrillic_generic.yml" marc_code: kaa name: Kara-Kalpak (Cyrillic) karachay-balkar_cyrillic: @@ -209,6 +229,7 @@ komi_cyrillic: marc_code: kom name: Komi (Cyrillic) komi-permyak_cyrillic: + conf: "cyrillic_generic.yml" marc_code: kom name: Komi-Permyak (Cyrillic) konkani_devanagari: @@ -241,34 +262,10 @@ kurdish_arabic: name: Kurdish (Arabic) kurdish_cyrillic: marc_code: kur - name: Kurdish + name: Kurdish (Cyrillic) kyrgyz_cyrillic: marc_code: kir name: Kyrgyz (Cyrillic) -ingush_cyrillic: - conf: "cyrillic_generic.yml" - name: Ingush (Cyrillic) - marc_code: inh -inuit_cyrillic: - conf: "cyrillic_generic.yml" - name: Inuit (Cyrillic) - marc_code: ipk -kabardian_cyrillic: - conf: "cyrillic_generic.yml" - name: Kabardian (Cyrillic) - marc_code: kbd -karakalpak_cyrillic: - conf: "cyrillic_generic.yml" - name: Karakalpak (Cyrillic) - marc_code: kaa -komi-Permyak_cyrillic: - conf: "cyrillic_generic.yml" - name: Permyak (Cyrillic) - marc_code: kom -kumyk_cyrillic: - conf: "cyrillic_generic.yml" - name: Kumyk (Cyrillic) - marc_code: kum lak_cyrillic: conf: "cyrillic_generic.yml" name: Lak (Cyrillic) @@ -281,17 +278,19 @@ lapp_cyrillic: conf: "cyrillic_generic.yml" name: Lapp (Cyrillic) marc_code: smi +lepcha: + marc_code: sit + name: Lepcha lezghian_cyrillic: conf: "cyrillic_generic.yml" marc_code: lez - name: lezghian (Cyrillic) + name: Lezghian (Cyrillic) +# limbu: +# marc_code: sit +# name: Limbu lithuanian_cyrillic: marc_code: lit name: Lithuanian (Cyrillic) -mari_cyrillic: - conf: "cyrillic_generic.yml" - name: Mari (Cyrillic) - marc_code: chm macedonian: marc_code: mac name: Macedonian @@ -317,12 +316,13 @@ mansi_cyrillic: marathi_devanagari: marc_code: mar name: Marathi (Devanagari) +mari_cyrillic: + conf: "cyrillic_generic.yml" + name: Mari (Cyrillic) + marc_code: chm moldovan_cyrillic: marc_code: mol name: Moldovan (Cyrillic) -molodstov_cyrillic: - conf: "cyrillic_generic.yml" - name: Molodstov (Cyrillic) mongolian_cyrillic: marc_code: mon name: Mongolian (Cyrillic) @@ -333,6 +333,7 @@ montenegrin_cyrillic: marc_code: cnr name: Montenegrin (Cyrillic) mordvin_cyrillic: + marc_code: fiu name: Mordvin (Cyrillic) nanai_cyrillic: conf: "cyrillic_generic.yml" @@ -356,7 +357,7 @@ nivkh_cyrillic: nogai_cyrillic: conf: "cyrillic_generic.yml" name: Nogai (Cyrillic) - marc_code: ng + marc_code: nog odia: marc_code: ori name: Odia @@ -367,9 +368,6 @@ pahari_devanagari: conf: "devanagari_generic.yml" marc_code: him name: Pahari (Devanagari) -pali: - marc_code: pli - name: Pali pali_bengali: conf: "_bengali_base.yml" marc_code: pli @@ -403,12 +401,9 @@ rajasthani_devanagari: marc_code: raj name: Rajasthani (Devanagari) romani_cyrillic: - marc_code: rom - name: Romani (Cyrillic) -romany_cyrillic: conf: "cyrillic_generic.yml" - name: Romany (Cyrillic) marc_code: rom + name: Romani (Cyrillic) romanian_cyrillic: conf: "cyrillic_generic.yml" name: Romanian (Cyrillic) @@ -422,6 +417,9 @@ sami_cyrillic: sanskrit_devanagari: marc_code: san name: Sanskrit (Devanagari) +# santali_ol_chiki: +# marc_code: sat +# name: Santali (Ol chiki) selkup_cyrillic: marc_code: sel name: Selkup (Cyrillic) @@ -479,6 +477,9 @@ thai: tibetan: marc_code: tib name: Tibetan +# tibetan_2015_r2r: +# marc_code: tib +# name: Tibetan (ñ,ṅ,ś,ź to ny,ng,sh,zh only) tigre_ethiopic: conf: "ethiopic_generic.yml" marc_code: tig @@ -490,10 +491,8 @@ tigrinya_ethiopic: turkmen_cyrillic: marc_code: tuk name: Turkmen (Cyrillic) -tuva_cyrillic: - conf: "cyrillic_generic.yml" - name: Tuva (Cyrillic) tuvinian_cyrillic: + conf: "cyrillic_generic.yml" marc_code: tyv name: Tuvinian (Cyrillic) udekhe_cyrillic: diff --git a/test/data/script_samples/_bengali_base.csv b/test/data/script_samples/_bengali_base.csv new file mode 100644 index 0000000..5aae90d --- /dev/null +++ b/test/data/script_samples/_bengali_base.csv @@ -0,0 +1,6 @@ +_bengali_base,কা খা গা ঘা ঙা চা ছা জা ঝা ঞা টা ঠা ডা ড়া ঢা ঢ়া ণা তা থা দা ধা না পা ফা বা ভা মা যা যা রা লা শা ষা সা হা কাঃ কাঁ ৎ ,kā khā gā ghā ṅā cā chā jā jhā ñā ṭā ṭhā ḍā ṛā ḍhā ṛhā ṇā tā thā dā dhā nā pā phā bā bhā mā yā ẏā rā lā śā shā sā hā kāḥ kām̐ t̲ +_bengali_base,আক্বা আখ্বা আগ্বা আঘ্বা আঙ্বা আচ্বা আছ্বা আজ্বা আঝ্বা আঞ্বা আট্বা আঠ্বা আড্বা আড়্বা আঢ্বা আঢ়্বা আণ্বা আত্বা আথ্বা আদ্বা আধ্বা আন্বা,ākvā ākhvā āgvā āghvā āṅvā ācvā āchvā ājvā ājhvā āñvā āṭvā āṭhvā āḍvā āṛvā āḍhvā āṛhvā āṇvā ātvā āthvā ādvā ādhvā ānvā +_bengali_base,আপ্ব আফ্বা আব্বা আভ্বা আম্বা আয্বা আয়্বা আর্বা আল্বা আশ্বা আষ্বা আস্বা আহ্বা আঃ ।,āpva āphvā ābbā ābhvā āmvā āyvā āẏvā ārvā ālvā āśvā āshvā āsvā āhvā āḥ / +_bengali_base,আঁকা আঁখা আঁগা আঁঘা আন্̐ঙা আঁচা আঁছা আঁজা আঁঝা আন্̐ঞা আঁটা আঁঠা আঁডা আঁড়া আঁঢা আঁঢ়া অন্̐ণ আঁতা আঁথা আঁদা আঁধা,ān̐kā ān̐khā ān̐gā ān̐ghā ān̐ṅā ān̐cā ān̐chā ān̐jā ān̐jhā ān̐ñā ān̐ṭā ān̐ṭhā ān̐ḍā ān̐ṛā ān̐ḍhā ān̐ṛhā an̐ṇa ān̐tā ān̐thā ān̐dā ān̐dhā +_bengali_base,অন্̐ন আন্̐না ঐন্̐নৈ ঔন্̐নৌ ঈন্̐নী ইন্̐নি ঊন্̐নূ উন্̐নু ঋন্̐নৃ ৠন্̐নৄ ঌন্̐নৢ ৡন্̐নৣ ওন্̐নো এন্̐নে আঁপা আঁফা আঁবা আঁভা আঁমা আঁযা আঁয়া আঁরা আঁলা আঁশা আঁষা আঁসা আঁহা ॥,an̐na ān̐nā ain̐nai aun̐nau īn̐nī in̐ni ūn̐nū un̐nu r̥n̐nr̥ r̥̄n̐nr̥̄ l̥n̐nl̥ l̥̄n̐nl̥̄ on̐no en̐ne ām̐pā ām̐phā ām̐bā ām̐bhā ām̐mā ām̐yā ām̐ẏā ām̐rā ām̐lā ām̐śā ām̐shā ām̐sā ām̐hā // +_bengali_base,ক কা কি কী কু কূ কৃ কৄ কৢ কৣ কৢ কে কৈ কো কৌ ০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ৽ ঽ ঽঽ,anna ka kā ki kī ku kū kr̥ kr̥̄ kl̥ kl̥̄ kl̥ ke kai ko kau 0 1 2 3 4 5 6 7 8 9 * ' '' diff --git a/test/data/script_samples/_ethiopic_base.csv b/test/data/script_samples/_ethiopic_base.csv new file mode 100644 index 0000000..dfa23fa --- /dev/null +++ b/test/data/script_samples/_ethiopic_base.csv @@ -0,0 +1,41 @@ +_ethiopic_base,ሀ ሀ ሁ ሁ ሂ ሂ ሃ ሃ ሄ ሄ ህ ህ ሆ ሆ ህ‍ ከህ‍,Ha ha Hu hu Hi hi Hā hā Hé hé He he Ho ho H kah +_ethiopic_base,ለ ለ ሉ ሉ ሊ ሊ ላ ላ ሌ ሌ ል ል ሎ ሎ ል‍ ከል‍ /,La la Lu lu Li li Lā lā Lé lé Le le Lo lo L kal / +_ethiopic_base,ሐ ሐ ሑ ሑ ሒ ሒ ሓ ሓ ሔ ሔ ሕ ሕ ሖ ሖ ሕ‍ ከሕ‍ / ,Ḥa ḥa Ḥu ḥu Ḥi ḥi Ḥā ḥā Ḥé ḥé Ḥe ḥe Ḥo ḥo Ḥ kaḥ / +_ethiopic_base,መ መ ሙ ሙ ሚ ሚ ማ ማ ሜ ሜ ም ም ሞ ሞ ም‍ ከም‍ / ,Ma ma Mu mu Mi mi Mā mā Mé mé Me me Mo mo M kam / +_ethiopic_base,ሠ ሠ ሡ ሡ ሢ ሢ ሣ ሣ ሤ ሤ ሥ ሥ ሦ ሦ ሥ‍ ከሥ‍ / ,Śa śa Śu śu Śi śi Śā śā Śé śé Śe śe Śo śo Ś kaś / +_ethiopic_base,ረ ረ ሩ ሩ ሪ ሪ ራ ራ ሬ ሬ ር ር ሮ ሮ ር‍ ከር‍ / ,Ra ra Ru ru Ri Ri Rā rā Ré ré Re re Ro ro R kar / +_ethiopic_base,ሰ ሰ ሱ ሱ ሲ ሲ ሳ ሳ ሴ ሴ ስ ስ ሶ ሶ ስ‍ ከስ‍ / ,Sa sa Su su Si si Sā sā Sé sé Se se So so S kas / +_ethiopic_base,ሸ ሸ ሹ ሹ ሺ ሺ ሻ ሻ ሼ ሼ ሽ ሽ ሾ ሾ ሽ‍ ከሽ‍ /,Ša ša Šu šu Ši ši Šā šā Šé šé Še še Šo šo Š kaš / +_ethiopic_base,ቀ ቀ ቁ ቁ ቂ ቂ ቃ ቃ ቄ ቄ ቅ ቅ ቆ ቆ ቅ‍ ከቅ‍ / ,Qa qa Qu qu Qi qi Qā qā Qé qé Qe qe Qo qo Q kaq / +_ethiopic_base,በ በ ቡ ቡ ቢ ቢ ባ ባ ቤ ቤ ብ ብ ቦ ቦ ብ‍ ከብ‍ / ,Ba ba Bu bu Bi bi Bā bā Bé bé Be be Bo bo B kab / +_ethiopic_base,ተ ተ ቱ ቱ ቲ ቲ ታ ታ ቴ ቴ ት ት ቶ ቶ ት‍ ከት‍ / ,Ta ta Tu tu Ti ti Tā tā Té té Te te To to T kat / +_ethiopic_base,ቸ ቸ ቹ ቹ ቺ ቺ ቻ ቻ ቼ ቼ ች ች ቾ ቾ ች‍ ች‍ / ,Ča ča Ču ču Či či Čā čā Čé čé Če če Čo čo Č č / +_ethiopic_base,ኀ ኀ ኁ ኁ ኂ ኂ ኃ ኃ ኄ ኄ ኅ ኅ ኆ ኆ ኅ‍ ኅ‍ / ,H̲a h̲a H̲u h̲u H̲i h̲i H̲ā h̲ā H̲é h̲é H̲e h̲e H̲o h̲o H̲ h̲ / +_ethiopic_base,ነ ነ ኑ ኑ ኒ ኒ ና ና ኔ ኔ ን ን ኖ ኖ ን‍ ን‍ / ,Na na Nu nu Ni ni Nā nā Né né Ne ne No no N n / +_ethiopic_base,ኘ ኘ ኙ ኙ ኚ ኚ ኛ ኛ ኜ ኜ ኝ ኝ ኞ ኞ ኝ‍ ኝ‍ / ,Ña ña Ñu ñu Ñi ñi Ñā ñā Ñé ñé Ñe ñe Ño ño Ñ ñ / +_ethiopic_base,አ አ ኡ ኡ ኢ ኢ ኣ ኣ ኤ ኤ እ እ ኦ ኦ / ,ʼA ʼa ʼU ʼu ʼI ʼi ʼĀ ʼā ʼÉ ʼé ʼE ʼe ʼO ʼo / +_ethiopic_base,ከ ከ ኩ ኩ ኪ ኪ ካ ካ ኬ ኬ ክ ክ ኮ ኮ ክ‍ ክ‍ / ,Ka ka Ku ku Ki ki Kā kā Ké ké Ke ke Ko ko K k / +_ethiopic_base,ኸ ኸ ኹ ኹ ኺ ኺ ኻ ኻ ኼ ኼ ኽ ኽ ኾ ኾ ኽ‍ ኽ‍ ኧ ኧ /,Xa xa Xu xu Xi xi Xā xā Xé xé Xe xe Xo xo X x / +_ethiopic_base,ዉ ዉ ዊ ዊ ዋ ዋ ዌ ዌ ው ው ዎ ዎ ው‍ ው‍ / ,Wa wa Wu wu Wi wi Wā wā Wé wé We we Wo wo W w / +_ethiopic_base,ዐ ዐ ዑ ዑ ዒ ዒ ዔ ዔ ዕ ዕ ዖ ዖ / ,ʻA ʻa ʻU ʻu ʻI ʻi ʻÉ ʻé ʻE ʻe ʻO ʻo / +_ethiopic_base,ዘ ዘ ዙ ዙ ዚ ዚ ዛ ዛ ዜ ዜ ዝ ዝ ዞ ዞ ዝ‍ ዝ‍ / ,Za za Zu zu Zi zi Zā zā Zé zé Ze ze Zo zo Z z / +_ethiopic_base,ዠ ዠ ዡ ዡ ዢ ዢ ዣ ዣ ዤ ዤ ዥ ዥ ዦ ዦ ዥ‍ ዥ‍ / ,Ža ža Žu žu Ži ži Žā žā Žé žé Že že Žo žo Ž ž / +_ethiopic_base,የ የ ዩ ዩ ዪ ዪ ያ ያ ዬ ዬ ይ ይ ዮ ዮ ይ‍ ይ‍ / ,Ya ya Yu yu Yi yi Yā yā Yé yé Ye ye Yo yo Y y / +_ethiopic_base,ደ ደ ዱ ዱ ዲ ዲ ዳ ዳ ዴ ዴ ድ ድ ዶ ዶ ድ‍ ድ‍ / ,Da da Du du Di di Dā dā Dé dé De de Do do D d / +_ethiopic_base,ጀ ጀ ጁ ጁ ጂ ጂ ጃ ጃ ጄ ጄ ጅ ጅ ጆ ጆ ጅ‍ ጅ‍ / ,Ǧa ǧa Ǧu ǧu Ǧi ǧi Ǧā ǧā Ǧé ǧé Ǧe ǧe Ǧo ǧo Ǧ ǧ / +_ethiopic_base,ገ ገ ጉ ጉ ጊ ጊ ጋ ጋ ጌ ጌ ግ ግ ጎ ጎ ግ‍ ግ‍ / ,Ga ga Gu gu Gi gi Gā gā Gé gé Ge ge Go go G g / +_ethiopic_base,ጠ ጠ ጡ ጡ ጢ ጢ ጣ ጣ ጤ ጤ ጥ ጥ ጦ ጦ ጥ‍ ጥ‍ / ,Ṭa ṭa Ṭu ṭu Ṭi ṭi Ṭā ṭā Ṭé ṭé Ṭe ṭe Ṭo ṭo Ṭ ṭ / +_ethiopic_base,ጨ ጨ ጩ ጩ ጪ ጪ ጫ ጫ ጬ ጬ ጭ ጭ ጮ ጮ ጭ‍ ጭ‍ / ,Ċa ċa Ċu ċu Ċi ċi Ċā ċā Ċé ċé Ċe ċe Ċo ċo Ċ ċ / +_ethiopic_base,ጰ ጰ ጱ ጱ ጲ ጲ ጳ ጳ ጴ ጴ ጵ ጵ ጶ ጶ ጵ‍ ጵ‍ / ,P̣a p̣a P̣u p̣u P̣i p̣i P̣ā p̣ā P̣é p̣é P̣e p̣e P̣o p̣o P̣ p̣ / +_ethiopic_base,ጸ ጸ ጹ ጹ ጺ ጺ ጻ ጻ ጼ ጼ ጽ ጽ ጾ ጾ ጽ‍ ጽ‍ / ,Ṣa ṣa Ṣu ṣu Ṣi ṣi Ṣā ṣā Ṣé ṣé Ṣe ṣe Ṣo ṣo Ṣ ṣ / +_ethiopic_base,ፀ ፀ ፁ ፁ ፂ ፂ ፃ ፃ ፄ ፄ ፅ ፅ ፆ ፆ ፅ‍ ፅ‍ / ,Ṡa ṡa Ṡu ṡu Ṡi ṡi Ṡā ṡā Ṡé ṡé Ṡe ṡe Ṡo ṡo Ṡ ṡ / +_ethiopic_base,ፈ ፈ ፉ ፉ ፊ ፊ ፋ ፋ ፌ ፌ ፍ ፍ ፎ ፎ ፍ‍ ፍ‍ / ,Fa fa Fu fu Fi fi Fā Fā Fé fé Fe fe Fo fo F f / +_ethiopic_base,ፐ ፐ ፑ ፑ ፒ ፒ ፓ ፓ ፔ ፔ ፕ ፕ ፖ ፖ ፕ‍ ፕ‍ / ,Pa pa Pu pu Pi pi Pā pā Pé pé Pe pe Po po P p / +_ethiopic_base,ቨ ቨ ቩ ቩ ቪ ቪ ቫ ቫ ቬ ቬ ቭ ቭ ቮ ቮ ቭ‍ ቭ‍ / ,Va va Vu vu Vi vi Vā vā Vé vé Ve ve Vo vo V v / +_ethiopic_base,ቈ ቈ ቊ ቊ ቋ ቋ ቌ ቌ ቍ ቍ / ,Qwa qwa Qwi qwi Qwā qwā Qwé qwé Qwe qwe / +_ethiopic_base,ኈ ኈ ኊ ኊ ኋ ኋ ኌ ኌ ኍ ኍ / ,H̲wa h̲wa H̲wi h̲wi H̲wā h̲wā H̲wé h̲wé H̲we h̲we / +_ethiopic_base,ኰ ኰ ኲ ኲ ኳ ኳ ኴ ኴ ኵ ኵ / ,Kwa kwa Kwi kwi Kwā kwā Kwé kwé Kwe kwe / +_ethiopic_base,ጐ ጐ ጒ ጒ (ጓ ጓ) ጔ ጔ ጕ ጕ /,Gwa gwa Gwi gwi (Gwā gwā) Gwé gwé Gwe gwe / +_ethiopic_base,ሏ ሏ ቧ ቧ ዟ ዟ (ጧ ጧ) ሟ ሟ ቷ ቷ ዧ ዧ (ጯ ጯ) ሯ ሯ ቿ ቿ ዯ ዯ /,Lwa lwa Bwa bwa Zwa zwa (Ṭwa ṭwa) Mwa mwa Twa twa Žwa žwa (Ċwa ċwa) Rwa rwa Čwa čwa Ywa ywa / +_ethiopic_base,(ጿ ጿ) ሷ ሷ ኗ ኗ ዷ ዷ (ፏ ፏ) ሿ ሿ ኟ ኟ ጇ ጇ ፘ ፘ ፙ ፙ ፚ ፚ / ,(Ṣwa ṣwa) Swa swa Nwa nwa Dwa dwa (Fwa fwa) Šwa šwa Ñwa ñwa Ǧwa ǧwa Rya rya Mya mya Fya fya / +_ethiopic_base,፩ ፪ ፫ ፬ ፭ ፮ ፯ ፰ ፱ ፲ ፳ ፴ ፵ ፶ ፷ ፸ ፹ ፺ ፻ ፼,1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 10000 diff --git a/test/data/script_samples/_gurmukhi_base.csv b/test/data/script_samples/_gurmukhi_base.csv new file mode 100644 index 0000000..1680276 --- /dev/null +++ b/test/data/script_samples/_gurmukhi_base.csv @@ -0,0 +1,14 @@ +_gurmukhi_base,ਅਭਿਨੰਦਨ ਗ੍ਰੰਥ ਸਿਨ੍ਘ ਕੰਵਲ ਸੰਪਾਦ੍ਕ ਸੇਖੋਂ ਪੰਜਾਬੀ ਭੰਵਰ ਦਵਿੰਦਰ ਸ਼ੌਂਕੀ ਕਾਨ੍ਗ ਅੰਮ੍ਰਿਤਸਰ ਮਿਲਾਂਗੇ ਆਰਾਂ ਮੰਡਲ ਪਾਣੀਆਂ ਰੰਧਾਵ ਪੰਜ ਕਹ੍ਨਿਅਨ੍ ਛੋਨਿਵਿਅਨ੍ ਟੁੱਟੇ,Abhinandana grantha Singha Kam̆wala sampādka Sekhoṃ Pañjābī Bham̆wara dawindara Shauṅkī Kānga Ammritasara milāṅge Ārāṃ maṇḍala pāṇīāṃ Randhāwa pañja kahnian chonivian ṭuṭṭe +_gurmukhi_base,ਅਈ ਐ ਅਊ ਔ ਆ ਅ ਈ ਇ ਉ ਏ ਓ ੶ ੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯,aī ai aū au ā a ī i u e o ' 0 1 2 3 4 5 6 7 8 9 +_gurmukhi_base,ੱਸ਼੍ ਸ਼੍ ੱਸ੍ ਸ੍ ੱਹ੍ ਹ੍ ੱਕ੍ ਕ੍ ੱਖ੍ ਖ੍ ਖ਼੍ ੱਗ੍ ਗ੍ ਗ਼੍ ੱਘ੍ ਘ੍ ਙ੍ ੱਕ਼੍ ਕ਼੍ ੱਚ੍ ਚ੍ ੱਛ੍ ਛ੍ ੱਜ੍ ਜ੍ ੱਝਝ੍ ਝ੍ ਞ੍ ੱਠ੍ ੱਟ੍ ਟ੍ ਠ੍ ੱਢ੍ ੱਡ੍ ਡ੍ ਢ੍ ਣ੍ ੱਤ੍ ਤ੍ ੱਥ੍ ਥ੍ ੱਦ੍ ਦ੍ ੱਧਧ੍ ਧ੍ ਨ੍ ੱਪ੍ ਪ੍ ੱਫਫ੍ ਫ੍ ੱਫ਼੍ ਫ਼੍ ੱਬ੍ ਬ੍ ੱਭਭ੍ ਭ੍ ਮ੍ ੱਯ੍ ਯ੍ ੱਰ੍ ਰ੍ ੱਲ਼੍ ਲ਼੍ ੱਲ੍ ਲ੍ ੱਵ੍ ਵ੍ ੱੜ੍ ੜ੍,ssh sh ss s hh h kk k kkh kh k̲h̲ gg g g̲h̲ ggh gh ṅ qq q cc c cch ch jj j jjh jh ñ ṭṭh ṭṭ ṭ ṭh ḍḍh ḍḍ ḍ ḍh ṇ tt t tth th dd d ddh dh n pp p pph ph ff f bb b bbh bh m yy y rr r ḷḷ ḷ ll l ww w ṛṛ ṛ +_gurmukhi_base,ਕ ਕਾ ਕੈ ਕਈ ਕੌ ਕਊ ਕੇ ਕਿ ਕੀ ਕੋ ਕੁ ਕੂ ੧ਓਮ੍੶ ੶ ਕਂ ਕੋੰ ਮਃ,ka kā kai kaī kau kaū ke ki kī ko ku kū 1Om* * kaṃ kom̆ maḥ +_gurmukhi_base,ਸ਼ ਸ ਹ ਕ ਖ ਖ਼ ਗ ਗ਼ ਘ ਙ ਕ਼ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਫ਼ ਬ ਭ ਮ ਯ ਰ ਲ਼ ਲ ਵ ੜ,sha sa ha ka kha k̲h̲a ga g̲h̲a gha ṅa qa ca cha ja jha ña ṭa ṭha ḍa ḍha ṇa ta tha da dha na pa pha fa ba bha ma ya ra ḷa la wa ṛa +_gurmukhi_base,ਸ਼ਾ ਸਾ ਹਾ ਕਾ ਖਾ ਖ਼ਾ ਗਾ ਗ਼ਾ ਘਾ ਙਾ ਕ਼ਾ ਚਾ ਛਾ ਜਾ ਝਾ ਞਾ ਟਾ ਠਾ ਡਾ ਢਾ ਣਾ ਤਾ ਥਾ ਦਾ ਧਾ ਨਾ ਪਾ ਫਾ ਫ਼ਾ ਬਾ ਭਾ ਮਾ ਯਾ ਰਾ ਲ਼ਾ ਲਾ ਵਾ ੜਾ,shā sā hā kā khā k̲h̲ā gā g̲h̲ā ghā ṅā qā cā chā jā jhā ñā ṭā ṭhā ḍā ḍhā ṇā tā thā dā dhā nā pā phā fā bā bhā mā yā rā ḷā lā wā ṛā +_gurmukhi_base,ਸ਼ੈ ਸੈ ਹੈ ਕੈ ਖੈ ਖ਼ੈ ਗੈ ਗ਼ੈ ਘੈ ਙੈ ਕ਼ੈ ਚੈ ਛੈ ਜੈ ਝੈ ਞੈ ਟੈ ਠੈ ਡੈ ਢੈ ਣੈ ਤੈ ਥੈ ਦੈ ਧੈ ਨੈ ਪੈ ਫੈ ਫ਼ੈ ਬੈ ਭੈ ਮੈ ਯੈ ਰੈ ਲ਼ੈ ਲੈ ਵੈ ੜੈ,shai sai hai kai khai k̲h̲ai gai g̲h̲ai ghai ṅai qai cai chai jai jhai ñai ṭai ṭhai ḍai ḍhai ṇai tai thai dai dhai nai pai phai fai bai bhai mai yai rai ḷai lai wai ṛai +_gurmukhi_base,ਸ਼ੌ ਸੌ ਹੌ ਕੌ ਖੌ ਖ਼ੌ ਗੌ ਗ਼ੌ ਘੌ ਙੌ ਕ਼ੌ ਚੌ ਛੌ ਜੌ ਝੌ ਞੌ ਟੌ ਠੌ ਡੌ ਢੌ ਣੌ ਤੌ ਥੌ ਦੌ ਧੌ ਨੌ ਪੌ ਫੌ ਫ਼ੌ ਬੌ ਭੌ ਮੌ ਯੌ ਰੌ ਲ਼ੌ ਲੌ ਵੌ ੜੌ,shau sau hau kau khau k̲h̲au gau g̲h̲au ghau ṅau qau cau chau jau jhau ñau ṭau ṭhau ḍau ḍhau ṇau tau thau dau dhau nau pau phau fau bau bhau mau yau rau ḷau lau wau ṛau +_gurmukhi_base,ਸ਼ੇ ਸੇ ਹੇ ਕੇ ਖੇ ਖ਼ੇ ਗੇ ਗ਼ੇ ਘੇ ਙੇ ਕ਼ੇ ਚੇ ਛੇ ਜੇ ਝੇ ਞੇ ਟੇ ਠੇ ਡੇ ਢੇ ਣੇ ਤੇ ਥੇ ਦੇ ਧੇ ਨੇ ਪੇ ਫੇ ਫ਼ੇ ਬੇ ਭੇ ਮੇ ਯੇ ਰੇ ਲ਼ੇ ਲੇ ਵੇ ੜੇ,she se he ke khe k̲h̲e ge g̲h̲e ghe ṅe qe ce che je jhe ñe ṭe ṭhe ḍe ḍhe ṇe te the de dhe ne pe phe fe be bhe me ye re ḷe le we ṛe +_gurmukhi_base,ਸ਼ਿ ਸਿ ਹਿ ਕਿ ਖਿ ਖ਼ਿ ਗਿ ਗ਼ਿ ਘਿ ਙਿ ਕ਼ਿ ਚਿ ਛਿ ਜਿ ਝਿ ਞਿ ਟਿ ਠਿ ਡਿ ਢਿ ਣਿ ਤਿ ਥਿ ਦਿ ਧਿ ਨਿ ਪਿ ਫਿ ਫ਼ਿ ਬਿ ਭਿ ਮਿ ਯਿ ਰਿ ਲ਼ਿ ਲਿ ਵਿ ੜਿ,shi si hi ki khi k̲h̲i gi g̲h̲i ghi ṅi qi ci chi ji jhi ñi ṭi ṭhi ḍi ḍhi ṇi ti thi di dhi ni pi phi fi bi bhi mi yi ri ḷi li wi ṛi +_gurmukhi_base,ਸ਼ੀ ਸੀ ਹੀ ਕੀ ਖੀ ਖ਼ੀ ਗੀ ਗ਼ੀ ਘੀ ਙੀ ਕ਼ੀ ਚੀ ਛੀ ਜੀ ਝੀ ਞੀ ਟੀ ਠੀ ਡੀ ਢੀ ਣੀ ਤੀ ਥੀ ਦੀ ਧੀ ਨੀ ਪੀ ਫੀ ਫ਼ੀ ਬੀ ਭੀ ਮੀ ਯੀ ਰੀ ਲ਼ੀ ਲੀ ਵੀ ੜੀ,shī sī hī kī khī k̲h̲ī gī g̲h̲ī ghī ṅī qī cī chī jī jhī ñī ṭī ṭhī ḍī ḍhī ṇī tī thī dī dhī nī pī phī fī bī bhī mī yī rī ḷī lī wī ṛī +_gurmukhi_base,ਸ਼ੋ ਸੋ ਹੋ ਕੋ ਖੋ ਖ਼ੋ ਗੋ ਗ਼ੋ ਘੋ ਙੋ ਕ਼ੋ ਚੋ ਛੋ ਜੋ ਝੋ ਞੋ ਟੋ ਠੋ ਡੋ ਢੋ ਣੋ ਤੋ ਥੋ ਦੋ ਧੋ ਨੋ ਪੋ ਫੋ ਫ਼ੋ ਬੋ ਭੋ ਮੋ ਯੋ ਰੋ ਲ਼ੋ ਲੋ ਵੋ ੜੋ,sho so ho ko kho k̲h̲o go g̲h̲o gho ṅo qo co cho jo jho ño ṭo ṭho ḍo ḍho ṇo to tho do dho no po pho fo bo bho mo yo ro ḷo lo wo ṛo +_gurmukhi_base,ਸ਼ੁ ਸੁ ਹੁ ਕੁ ਖੁ ਖ਼ੁ ਗੁ ਗ਼ੁ ਘੁ ਙੁ ਕ਼ੁ ਚੁ ਛੁ ਜੁ ਝੁ ਞੁ ਟੁ ਠੁ ਡੁ ਢੁ ਣੁ ਤੁ ਥੁ ਦੁ ਧੁ ਨੁ ਪੁ ਫੁ ਫ਼ੁ ਬੁ ਭੁ ਮੁ ਯੁ ਰੁ ਲ਼ੁ ਲੁ ਵੁ ੜੁ,shu su hu ku khu k̲h̲u gu g̲h̲u ghu ṅu qu cu chu ju jhu ñu ṭu ṭhu ḍu ḍhu ṇu tu thu du dhu nu pu phu fu bu bhu mu yu ru ḷu lu wu ṛu +_gurmukhi_base,ਸ਼ੂ ਸੂ ਹੂ ਕੂ ਖੂ ਖ਼ੂ ਗੂ ਗ਼ੂ ਘੂ ਙੂ ਕ਼ੂ ਚੂ ਛੂ ਜੂ ਝੂ ਞੂ ਟੂ ਠੂ ਡੂ ਢੂ ਣੂ ਤੂ ਥੂ ਦੂ ਧੂ ਨੂ ਪੂ ਫੂ ਫ਼ੂ ਬੂ ਭੂ ਮੂ ਯੂ ਰੂ ਲ਼ੂ ਲੂ ਵੂ ੜੂ,shū sū hū kū khū k̲h̲ū gū g̲h̲ū ghū ṅū qū cū chū jū jhū ñū ṭū ṭhū ḍū ḍhū ṇū tū thū dū dhū nū pū phū fū bū bhū mū yū rū ḷū lū wū ṛū diff --git a/test/data/script_samples/abazin_cyrillic.csv b/test/data/script_samples/abazin_cyrillic.csv new file mode 100644 index 0000000..c47d595 --- /dev/null +++ b/test/data/script_samples/abazin_cyrillic.csv @@ -0,0 +1,2 @@ +abazin_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Ӏ ӏ Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g Ḣ ḣ I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z +abazin_cyrillic,Ḣ ḣ, diff --git a/test/data/script_samples/abkhaz_cyrillic.csv b/test/data/script_samples/abkhaz_cyrillic.csv new file mode 100644 index 0000000..536bf68 --- /dev/null +++ b/test/data/script_samples/abkhaz_cyrillic.csv @@ -0,0 +1,2 @@ +abkhaz_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Џ џ Ҕ Ҕ ҕ Ҝ ҝ Ҩ ҩ Ҵ ҵ Ҷ Ҷ ҷ Ҽ Ҽ ҽ Ҿ Ҿ ҿ Ӄ Ӄ ӄ Ә ә Ӡ Ӡ ӡ Ӽ ӽ Ԥ Ԥ ԥ, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z J j GH Gh gh Q q W w T͡Ṡ t͡ṡ ĊH Ċh ċh ĆH Ćh ćh ĆH́ Ćh́ ćh́ KH Kh kh Ẇ ẇ D͡Z D͡z d͡z Ḣ ḣ PH Ph ph +abkhaz_cyrillic, Џ џ Ҕ Ҕ ҕ Ҝ ҝ Ҩ ҩ Ҵ ҵ Ҷ Ҷ ҷ Ҽ Ҽ ҽ Ҿ Ҿ ҿ Ӄ Ӄ ӄ Ә ә Ӡ Ӡ ӡ Ӽ ӽ Ԥ Ԥ ԥ,J j GH Gh gh Q q W w T͡Ṡ t͡ṡ ĊH Ċh ċh ĆH Ćh ćh ĆH́ Ćh́ ćh́ KH Kh kh Ẇ ẇ D͡Z D͡z d͡z Ḣ ḣ PH Ph ph diff --git a/test/data/script_samples/adygei_cyrillic.csv b/test/data/script_samples/adygei_cyrillic.csv new file mode 100644 index 0000000..1143d3a --- /dev/null +++ b/test/data/script_samples/adygei_cyrillic.csv @@ -0,0 +1,2 @@ +adygei_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӏ ӏ, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ḣ ḣ +adygei_cyrillic,Ӏ ӏ,Ḣ ḣ diff --git a/test/data/script_samples/altai_cyrillic.csv b/test/data/script_samples/altai_cyrillic.csv new file mode 100644 index 0000000..830158d --- /dev/null +++ b/test/data/script_samples/altai_cyrillic.csv @@ -0,0 +1,2 @@ +altai_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ң Ң Ң Ң ң ң Ԃ ԃ Ө ө Ү ү, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z N︠G︡ N͡G N︠g︡ N͡g n͡g n︠g︡ D́ d́ Ȯ ȯ U̇ u̇ +altai_cyrillic,Ң Ң Ң Ң ң ң Ԃ ԃ Ө ө Ү ү,N︠G︡ N͡G N︠g︡ N͡g n͡g n︠g︡ D́ d́ Ȯ ȯ U̇ u̇ diff --git a/test/data/script_samples/amharic.csv b/test/data/script_samples/amharic.csv new file mode 100644 index 0000000..d80fda9 --- /dev/null +++ b/test/data/script_samples/amharic.csv @@ -0,0 +1,41 @@ +amharic,ሀ ሀ ሁ ሁ ሂ ሂ ሃ ሃ ሄ ሄ ህ ህ ሆ ሆ ህ‍ ከህ‍,Ha ha Hu hu Hi hi Hā hā Hé hé He he Ho ho H kah +amharic,ለ ለ ሉ ሉ ሊ ሊ ላ ላ ሌ ሌ ል ል ሎ ሎ ል‍ ከል‍ /,La la Lu lu Li li Lā lā Lé lé Le le Lo lo L kal / +amharic,ሐ ሐ ሑ ሑ ሒ ሒ ሓ ሓ ሔ ሔ ሕ ሕ ሖ ሖ ሕ‍ ከሕ‍ / ,Ḥa ḥa Ḥu ḥu Ḥi ḥi Ḥā ḥā Ḥé ḥé Ḥe ḥe Ḥo ḥo Ḥ kaḥ / +amharic,መ መ ሙ ሙ ሚ ሚ ማ ማ ሜ ሜ ም ም ሞ ሞ ም‍ ከም‍ / ,Ma ma Mu mu Mi mi Mā mā Mé mé Me me Mo mo M kam / +amharic,ሠ ሠ ሡ ሡ ሢ ሢ ሣ ሣ ሤ ሤ ሥ ሥ ሦ ሦ ሥ‍ ከሥ‍ / ,Śa śa Śu śu Śi śi Śā śā Śé śé Śe śe Śo śo Ś kaś / +amharic,ረ ረ ሩ ሩ ሪ ሪ ራ ራ ሬ ሬ ር ር ሮ ሮ ር‍ ከር‍ / ,Ra ra Ru ru Ri Ri Rā rā Ré ré Re re Ro ro R kar / +amharic,ሰ ሰ ሱ ሱ ሲ ሲ ሳ ሳ ሴ ሴ ስ ስ ሶ ሶ ስ‍ ከስ‍ / ,Sa sa Su su Si si Sā sā Sé sé Se se So so S kas / +amharic,ሸ ሸ ሹ ሹ ሺ ሺ ሻ ሻ ሼ ሼ ሽ ሽ ሾ ሾ ሽ‍ ከሽ‍ /,Ša ša Šu šu Ši ši Šā šā Šé šé Še še Šo šo Š kaš / +amharic,ቀ ቀ ቁ ቁ ቂ ቂ ቃ ቃ ቄ ቄ ቅ ቅ ቆ ቆ ቅ‍ ከቅ‍ / ,Qa qa Qu qu Qi qi Qā qā Qé qé Qe qe Qo qo Q kaq / +amharic,በ በ ቡ ቡ ቢ ቢ ባ ባ ቤ ቤ ብ ብ ቦ ቦ ብ‍ ከብ‍ / ,Ba ba Bu bu Bi bi Bā bā Bé bé Be be Bo bo B kab / +amharic,ተ ተ ቱ ቱ ቲ ቲ ታ ታ ቴ ቴ ት ት ቶ ቶ ት‍ ከት‍ / ,Ta ta Tu tu Ti ti Tā tā Té té Te te To to T kat / +amharic,ቸ ቸ ቹ ቹ ቺ ቺ ቻ ቻ ቼ ቼ ች ች ቾ ቾ ች‍ ች‍ / ,Ča ča Ču ču Či či Čā čā Čé čé Če če Čo čo Č č / +amharic,ኀ ኀ ኁ ኁ ኂ ኂ ኃ ኃ ኄ ኄ ኅ ኅ ኆ ኆ ኅ‍ ኅ‍ / ,H̲a h̲a H̲u h̲u H̲i h̲i H̲ā h̲ā H̲é h̲é H̲e h̲e H̲o h̲o H̲ h̲ / +amharic,ነ ነ ኑ ኑ ኒ ኒ ና ና ኔ ኔ ን ን ኖ ኖ ን‍ ን‍ / ,Na na Nu nu Ni ni Nā nā Né né Ne ne No no N n / +amharic,ኘ ኘ ኙ ኙ ኚ ኚ ኛ ኛ ኜ ኜ ኝ ኝ ኞ ኞ ኝ‍ ኝ‍ / ,Ña ña Ñu ñu Ñi ñi Ñā ñā Ñé ñé Ñe ñe Ño ño Ñ ñ / +amharic,አ አ ኡ ኡ ኢ ኢ ኣ ኣ ኤ ኤ እ እ ኦ ኦ / ,ʼA ʼa ʼU ʼu ʼI ʼi ʼĀ ʼā ʼÉ ʼé ʼE ʼe ʼO ʼo / +amharic,ከ ከ ኩ ኩ ኪ ኪ ካ ካ ኬ ኬ ክ ክ ኮ ኮ ክ‍ ክ‍ / ,Ka ka Ku ku Ki ki Kā kā Ké ké Ke ke Ko ko K k / +amharic,ኸ ኸ ኹ ኹ ኺ ኺ ኻ ኻ ኼ ኼ ኽ ኽ ኾ ኾ ኽ‍ ኽ‍ ኧ ኧ /,Xa xa Xu xu Xi xi Xā xā Xé xé Xe xe Xo xo X x / +amharic,ዉ ዉ ዊ ዊ ዋ ዋ ዌ ዌ ው ው ዎ ዎ ው‍ ው‍ / ,Wa wa Wu wu Wi wi Wā wā Wé wé We we Wo wo W w / +amharic,ዐ ዐ ዑ ዑ ዒ ዒ ዔ ዔ ዕ ዕ ዖ ዖ / ,ʻA ʻa ʻU ʻu ʻI ʻi ʻÉ ʻé ʻE ʻe ʻO ʻo / +amharic,ዘ ዘ ዙ ዙ ዚ ዚ ዛ ዛ ዜ ዜ ዝ ዝ ዞ ዞ ዝ‍ ዝ‍ / ,Za za Zu zu Zi zi Zā zā Zé zé Ze ze Zo zo Z z / +amharic,ዠ ዠ ዡ ዡ ዢ ዢ ዣ ዣ ዤ ዤ ዥ ዥ ዦ ዦ ዥ‍ ዥ‍ / ,Ža ža Žu žu Ži ži Žā žā Žé žé Že že Žo žo Ž ž / +amharic,የ የ ዩ ዩ ዪ ዪ ያ ያ ዬ ዬ ይ ይ ዮ ዮ ይ‍ ይ‍ / ,Ya ya Yu yu Yi yi Yā yā Yé yé Ye ye Yo yo Y y / +amharic,ደ ደ ዱ ዱ ዲ ዲ ዳ ዳ ዴ ዴ ድ ድ ዶ ዶ ድ‍ ድ‍ / ,Da da Du du Di di Dā dā Dé dé De de Do do D d / +amharic,ጀ ጀ ጁ ጁ ጂ ጂ ጃ ጃ ጄ ጄ ጅ ጅ ጆ ጆ ጅ‍ ጅ‍ / ,Ǧa ǧa Ǧu ǧu Ǧi ǧi Ǧā ǧā Ǧé ǧé Ǧe ǧe Ǧo ǧo Ǧ ǧ / +amharic,ገ ገ ጉ ጉ ጊ ጊ ጋ ጋ ጌ ጌ ግ ግ ጎ ጎ ግ‍ ግ‍ / ,Ga ga Gu gu Gi gi Gā gā Gé gé Ge ge Go go G g / +amharic,ጠ ጠ ጡ ጡ ጢ ጢ ጣ ጣ ጤ ጤ ጥ ጥ ጦ ጦ ጥ‍ ጥ‍ / ,Ṭa ṭa Ṭu ṭu Ṭi ṭi Ṭā ṭā Ṭé ṭé Ṭe ṭe Ṭo ṭo Ṭ ṭ / +amharic,ጨ ጨ ጩ ጩ ጪ ጪ ጫ ጫ ጬ ጬ ጭ ጭ ጮ ጮ ጭ‍ ጭ‍ / ,Ċa ċa Ċu ċu Ċi ċi Ċā ċā Ċé ċé Ċe ċe Ċo ċo Ċ ċ / +amharic,ጰ ጰ ጱ ጱ ጲ ጲ ጳ ጳ ጴ ጴ ጵ ጵ ጶ ጶ ጵ‍ ጵ‍ / ,P̣a p̣a P̣u p̣u P̣i p̣i P̣ā p̣ā P̣é p̣é P̣e p̣e P̣o p̣o P̣ p̣ / +amharic,ጸ ጸ ጹ ጹ ጺ ጺ ጻ ጻ ጼ ጼ ጽ ጽ ጾ ጾ ጽ‍ ጽ‍ / ,Ṣa ṣa Ṣu ṣu Ṣi ṣi Ṣā ṣā Ṣé ṣé Ṣe ṣe Ṣo ṣo Ṣ ṣ / +amharic,ፀ ፀ ፁ ፁ ፂ ፂ ፃ ፃ ፄ ፄ ፅ ፅ ፆ ፆ ፅ‍ ፅ‍ / ,Ṡa ṡa Ṡu ṡu Ṡi ṡi Ṡā ṡā Ṡé ṡé Ṡe ṡe Ṡo ṡo Ṡ ṡ / +amharic,ፈ ፈ ፉ ፉ ፊ ፊ ፋ ፋ ፌ ፌ ፍ ፍ ፎ ፎ ፍ‍ ፍ‍ / ,Fa fa Fu fu Fi fi Fā Fā Fé fé Fe fe Fo fo F f / +amharic,ፐ ፐ ፑ ፑ ፒ ፒ ፓ ፓ ፔ ፔ ፕ ፕ ፖ ፖ ፕ‍ ፕ‍ / ,Pa pa Pu pu Pi pi Pā pā Pé pé Pe pe Po po P p / +amharic,ቨ ቨ ቩ ቩ ቪ ቪ ቫ ቫ ቬ ቬ ቭ ቭ ቮ ቮ ቭ‍ ቭ‍ / ,Va va Vu vu Vi vi Vā vā Vé vé Ve ve Vo vo V v / +amharic,ቈ ቈ ቊ ቊ ቋ ቋ ቌ ቌ ቍ ቍ / ,Qwa qwa Qwi qwi Qwā qwā Qwé qwé Qwe qwe / +amharic,ኈ ኈ ኊ ኊ ኋ ኋ ኌ ኌ ኍ ኍ / ,H̲wa h̲wa H̲wi h̲wi H̲wā h̲wā H̲wé h̲wé H̲we h̲we / +amharic,ኰ ኰ ኲ ኲ ኳ ኳ ኴ ኴ ኵ ኵ / ,Kwa kwa Kwi kwi Kwā kwā Kwé kwé Kwe kwe / +amharic,ጐ ጐ ጒ ጒ (ጓ ጓ) ጔ ጔ ጕ ጕ /,Gwa gwa Gwi gwi (Gwā gwā) Gwé gwé Gwe gwe / +amharic,ሏ ሏ ቧ ቧ ዟ ዟ (ጧ ጧ) ሟ ሟ ቷ ቷ ዧ ዧ (ጯ ጯ) ሯ ሯ ቿ ቿ ዯ ዯ /,Lwa lwa Bwa bwa Zwa zwa (Ṭwa ṭwa) Mwa mwa Twa twa Žwa žwa (Ċwa ċwa) Rwa rwa Čwa čwa Ywa ywa / +amharic,(ጿ ጿ) ሷ ሷ ኗ ኗ ዷ ዷ (ፏ ፏ) ሿ ሿ ኟ ኟ ጇ ጇ ፘ ፘ ፙ ፙ ፚ ፚ / ,(Ṣwa ṣwa) Swa swa Nwa nwa Dwa dwa (Fwa fwa) Šwa šwa Ñwa ñwa Ǧwa ǧwa Rya rya Mya mya Fya fya / +amharic,፩ ፪ ፫ ፬ ፭ ፮ ፯ ፰ ፱ ፲ ፳ ፴ ፵ ፶ ፷ ፸ ፹ ፺ ፻ ፼,1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 10000 diff --git a/test/data/script_samples/arabic.csv b/test/data/script_samples/arabic.csv index 3e04837..07153f2 100644 --- a/test/data/script_samples/arabic.csv +++ b/test/data/script_samples/arabic.csv @@ -1,3 +1,17 @@ -"arabic","نظام الحكم في عمان : من إمامة الإنتخاب الى السلطنة الوراثية","Niẓām al-ḥukm fī ʻUmān : min imāmat al-intikhāb ilá al-salṭanah al-wirāthīyah","s2r" -"arabic","ندوة علاقات مصر بدول حوض النيل في ظل رئاسة مصر للاتحاد الإفريقي","Nadwat ʻAlāqāt Miṣr bi-Duwal Ḥawḍ al-Nīl fī ẓill Riʼāsat Miṣr lil-Ittiḥād al-Ifrīqī","s2r" -"arabic","تهذيب البيان والجمع في الفرق بين التكليف والوضع","Tahdhīb al-bayān wa-al-jamʻ fī al-farq bayna al-taklīf wa-al-waḍʻ","s2r" +arabic,اللّٰه اللّٰه قرآن لله بلله رحمن رواة هذه ذلك بن حياتي إسحق سه تح فح له مائة معة,Allāh allāh Qurʼān lillāh billāh Raḥmān ruwāt hādhīhi dhālika ibn ḥayātī Isḥaq sʹh tḥ fḥ la-hu miʼah miʻat ,r2s +arabic,الطبعة 1 الطبعة 2 الطبعة 3 الطبعة 4 الطبعة 5 الطبعة 6 الطبعة 7 الطبعة 8 الطبعة 9 ألى,al-ṭabʻah 1 al-ṭabʻah 2 al-ṭabʻah 3 al-ṭabʻah 4 al-ṭabʻah 5 al-ṭabʻah 6 al-ṭabʻah 7 al-ṭabʻah 8 al-ṭabʻah 9 alá,r2s +arabic,النور ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ٠ ١٩٩٩ ٢٠٠٠ ٢٠٢٦ نور,al-nūr 1 2 3 4 5 6 7 8 9 0 1999 2000 2026 nūr,r2s +arabic,والكبير بالكبير للكبير لهب لكبير فيالقدس كقدس,wa-al-kabīr bi-al-kabīr lil-kabīr li-hubb lā-kabīr fī-al-Quds ka-Quds ,r2s +arabic,وضع عوض دلو يد حيل طهي أولى صورة ذو يمان جيل في كتاب سحاب جمان,waḍʻ ʻiwaḍ dalw yad ḥiyal ṭahy ūlá ṣūrah dhū īmān jīl fī kitāb saḥāb jumān ,r2s +arabic,أوج نوم لو أيسر شيخ عيني فعلو ألاعك علما وعملا فاعل رضا,awj nawm law aysar shaykh ʻaynay faʻalū ulāʻika ʻilman wa-ʻamalan fāʻil riḍā,r2s +arabic,حتى مضى كبرى يحيى مسمى مصطفى ردي الدين المصري المصرية,ḥattá maḍá kubrá Yaḥyá musammá Muṣṭafá Radī al-Dīn al-Miṣrī al-Miṣrīyah,r2s +arabic,صلاه الرسالة البهية مرعاه أرجوزة في الطب وزارة التربية مرعات الزمان,ṣalāh al-Risālah al-bahīyah mirʻāh Urjūzah fī al-ṭibb Wizārat al-Tarbiyah Mirʻāt al-zamān,r2s +arabic,أسد أنس إذ مسعلة معتمر داعم ملع خطع رحلة بن جبير الإستدراك كتب إقتنتʹها بإهتمام عبد المجيد,asad uns idha masʻalah muʻtamar dāʻim malaʻa khaṭiʻa Riḥlat ibn Jubayr al-istidrāk kutub iqtanatʹhā bi-ihtimām ʻAbd al-Majīd ,r2s +arabic,آلة كلية الآداب تعاليف معاثر خلفاع عدو قوگ شوال صور جو,ālah Kullīyat al-Ādāb taʻālīf maʻāthir khulafāʻ ʻadūw qūwag Shawwāl ṣawwara jaww,r2s +arabic,يام سيد قصي الغز̋ الكشاف قاضن معنا طبعا فجعتا صقعا,ayyām sayyid Quṣayy al-Ghazzi̋ al-Kashshāf qāḍin maʻnan ṭabʻan fajʻatan ṣuqʻan,r2s +arabic,المشترك وضعا والمفترق صقعا ما ولي مصر معرفة ما يجب لهم,al-Mushtarik waḍʻan wa-al-muftariq ṣuqʻan man waliya miṣr maʻrifat mā yajibu la-hum,r2s +arabic,إلى يومنا هذا أنا وأنت هاذه الحال معلفاته وشروحها,ilá yawminā hādhā anā wa-anta hādhihi al-ḥāl muʻallafātuhu wa-shurūḥuhā,r2s +arabic,حياته وعصره توفيق الحكيم أفكاره آثاره أن أنه بين يديه,ḥayātuhu wa-ʻaṣruh Tawfīq al-Ḥakīm afkāruh āthāruh anna annahu bayna yadayhu,r2s +arabic,أدهم أكرمثها قلعه‌جي شيخ‌زادة جارمانوس لورد غرنفيل يساغوجي,Adʹham akramatʹhā qalʻahʹjī shaykhʹzādah Jārmānūs Lūrd Ghranfīl Īsāghūjī,r2s +arabic,طاها ياسين عمر بهجة أحمد بن محمد بن أبي الربيع,Ṭāhā Yāsīn ʻAmr Bahjat Aḥmad ibn Muḥammad ibn Abī al-Rabīʻ ,r2s +arabic,شرح ابن عقيل على ألفية ابن مالك بن خدة بن-عبد اللّٰه مائة,Sharḥ Ibn ʻAqīl ʻalá Alfīyat Ibn Mālik Bin Khiddah Bin-ʻAbd Allāh miʼah,r2s diff --git a/test/data/script_samples/argobba_ethiopic.csv b/test/data/script_samples/argobba_ethiopic.csv new file mode 100644 index 0000000..81f239e --- /dev/null +++ b/test/data/script_samples/argobba_ethiopic.csv @@ -0,0 +1,41 @@ +argobba_ethiopic,ሀ ሀ ሁ ሁ ሂ ሂ ሃ ሃ ሄ ሄ ህ ህ ሆ ሆ ህ‍ ከህ‍,Ha ha Hu hu Hi hi Hā hā Hé hé He he Ho ho H kah +argobba_ethiopic,ለ ለ ሉ ሉ ሊ ሊ ላ ላ ሌ ሌ ል ል ሎ ሎ ል‍ ከል‍ /,La la Lu lu Li li Lā lā Lé lé Le le Lo lo L kal / +argobba_ethiopic,ሐ ሐ ሑ ሑ ሒ ሒ ሓ ሓ ሔ ሔ ሕ ሕ ሖ ሖ ሕ‍ ከሕ‍ / ,Ḥa ḥa Ḥu ḥu Ḥi ḥi Ḥā ḥā Ḥé ḥé Ḥe ḥe Ḥo ḥo Ḥ kaḥ / +argobba_ethiopic,መ መ ሙ ሙ ሚ ሚ ማ ማ ሜ ሜ ም ም ሞ ሞ ም‍ ከም‍ / ,Ma ma Mu mu Mi mi Mā mā Mé mé Me me Mo mo M kam / +argobba_ethiopic,ሠ ሠ ሡ ሡ ሢ ሢ ሣ ሣ ሤ ሤ ሥ ሥ ሦ ሦ ሥ‍ ከሥ‍ / ,Śa śa Śu śu Śi śi Śā śā Śé śé Śe śe Śo śo Ś kaś / +argobba_ethiopic,ረ ረ ሩ ሩ ሪ ሪ ራ ራ ሬ ሬ ር ር ሮ ሮ ር‍ ከር‍ / ,Ra ra Ru ru Ri Ri Rā rā Ré ré Re re Ro ro R kar / +argobba_ethiopic,ሰ ሰ ሱ ሱ ሲ ሲ ሳ ሳ ሴ ሴ ስ ስ ሶ ሶ ስ‍ ከስ‍ / ,Sa sa Su su Si si Sā sā Sé sé Se se So so S kas / +argobba_ethiopic,ሸ ሸ ሹ ሹ ሺ ሺ ሻ ሻ ሼ ሼ ሽ ሽ ሾ ሾ ሽ‍ ከሽ‍ /,Ša ša Šu šu Ši ši Šā šā Šé šé Še še Šo šo Š kaš / +argobba_ethiopic,ቀ ቀ ቁ ቁ ቂ ቂ ቃ ቃ ቄ ቄ ቅ ቅ ቆ ቆ ቅ‍ ከቅ‍ / ,Qa qa Qu qu Qi qi Qā qā Qé qé Qe qe Qo qo Q kaq / +argobba_ethiopic,በ በ ቡ ቡ ቢ ቢ ባ ባ ቤ ቤ ብ ብ ቦ ቦ ብ‍ ከብ‍ / ,Ba ba Bu bu Bi bi Bā bā Bé bé Be be Bo bo B kab / +argobba_ethiopic,ተ ተ ቱ ቱ ቲ ቲ ታ ታ ቴ ቴ ት ት ቶ ቶ ት‍ ከት‍ / ,Ta ta Tu tu Ti ti Tā tā Té té Te te To to T kat / +argobba_ethiopic,ቸ ቸ ቹ ቹ ቺ ቺ ቻ ቻ ቼ ቼ ች ች ቾ ቾ ች‍ ች‍ / ,Ča ča Ču ču Či či Čā čā Čé čé Če če Čo čo Č č / +argobba_ethiopic,ኀ ኀ ኁ ኁ ኂ ኂ ኃ ኃ ኄ ኄ ኅ ኅ ኆ ኆ ኅ‍ ኅ‍ / ,H̲a h̲a H̲u h̲u H̲i h̲i H̲ā h̲ā H̲é h̲é H̲e h̲e H̲o h̲o H̲ h̲ / +argobba_ethiopic,ነ ነ ኑ ኑ ኒ ኒ ና ና ኔ ኔ ን ን ኖ ኖ ን‍ ን‍ / ,Na na Nu nu Ni ni Nā nā Né né Ne ne No no N n / +argobba_ethiopic,ኘ ኘ ኙ ኙ ኚ ኚ ኛ ኛ ኜ ኜ ኝ ኝ ኞ ኞ ኝ‍ ኝ‍ / ,Ña ña Ñu ñu Ñi ñi Ñā ñā Ñé ñé Ñe ñe Ño ño Ñ ñ / +argobba_ethiopic,አ አ ኡ ኡ ኢ ኢ ኣ ኣ ኤ ኤ እ እ ኦ ኦ / ,ʼA ʼa ʼU ʼu ʼI ʼi ʼĀ ʼā ʼÉ ʼé ʼE ʼe ʼO ʼo / +argobba_ethiopic,ከ ከ ኩ ኩ ኪ ኪ ካ ካ ኬ ኬ ክ ክ ኮ ኮ ክ‍ ክ‍ / ,Ka ka Ku ku Ki ki Kā kā Ké ké Ke ke Ko ko K k / +argobba_ethiopic,ኸ ኸ ኹ ኹ ኺ ኺ ኻ ኻ ኼ ኼ ኽ ኽ ኾ ኾ ኽ‍ ኽ‍ ኧ ኧ /,Xa xa Xu xu Xi xi Xā xā Xé xé Xe xe Xo xo X x / +argobba_ethiopic,ዉ ዉ ዊ ዊ ዋ ዋ ዌ ዌ ው ው ዎ ዎ ው‍ ው‍ / ,Wa wa Wu wu Wi wi Wā wā Wé wé We we Wo wo W w / +argobba_ethiopic,ዐ ዐ ዑ ዑ ዒ ዒ ዔ ዔ ዕ ዕ ዖ ዖ / ,ʻA ʻa ʻU ʻu ʻI ʻi ʻÉ ʻé ʻE ʻe ʻO ʻo / +argobba_ethiopic,ዘ ዘ ዙ ዙ ዚ ዚ ዛ ዛ ዜ ዜ ዝ ዝ ዞ ዞ ዝ‍ ዝ‍ / ,Za za Zu zu Zi zi Zā zā Zé zé Ze ze Zo zo Z z / +argobba_ethiopic,ዠ ዠ ዡ ዡ ዢ ዢ ዣ ዣ ዤ ዤ ዥ ዥ ዦ ዦ ዥ‍ ዥ‍ / ,Ža ža Žu žu Ži ži Žā žā Žé žé Že že Žo žo Ž ž / +argobba_ethiopic,የ የ ዩ ዩ ዪ ዪ ያ ያ ዬ ዬ ይ ይ ዮ ዮ ይ‍ ይ‍ / ,Ya ya Yu yu Yi yi Yā yā Yé yé Ye ye Yo yo Y y / +argobba_ethiopic,ደ ደ ዱ ዱ ዲ ዲ ዳ ዳ ዴ ዴ ድ ድ ዶ ዶ ድ‍ ድ‍ / ,Da da Du du Di di Dā dā Dé dé De de Do do D d / +argobba_ethiopic,ጀ ጀ ጁ ጁ ጂ ጂ ጃ ጃ ጄ ጄ ጅ ጅ ጆ ጆ ጅ‍ ጅ‍ / ,Ǧa ǧa Ǧu ǧu Ǧi ǧi Ǧā ǧā Ǧé ǧé Ǧe ǧe Ǧo ǧo Ǧ ǧ / +argobba_ethiopic,ገ ገ ጉ ጉ ጊ ጊ ጋ ጋ ጌ ጌ ግ ግ ጎ ጎ ግ‍ ግ‍ / ,Ga ga Gu gu Gi gi Gā gā Gé gé Ge ge Go go G g / +argobba_ethiopic,ጠ ጠ ጡ ጡ ጢ ጢ ጣ ጣ ጤ ጤ ጥ ጥ ጦ ጦ ጥ‍ ጥ‍ / ,Ṭa ṭa Ṭu ṭu Ṭi ṭi Ṭā ṭā Ṭé ṭé Ṭe ṭe Ṭo ṭo Ṭ ṭ / +argobba_ethiopic,ጨ ጨ ጩ ጩ ጪ ጪ ጫ ጫ ጬ ጬ ጭ ጭ ጮ ጮ ጭ‍ ጭ‍ / ,Ċa ċa Ċu ċu Ċi ċi Ċā ċā Ċé ċé Ċe ċe Ċo ċo Ċ ċ / +argobba_ethiopic,ጰ ጰ ጱ ጱ ጲ ጲ ጳ ጳ ጴ ጴ ጵ ጵ ጶ ጶ ጵ‍ ጵ‍ / ,P̣a p̣a P̣u p̣u P̣i p̣i P̣ā p̣ā P̣é p̣é P̣e p̣e P̣o p̣o P̣ p̣ / +argobba_ethiopic,ጸ ጸ ጹ ጹ ጺ ጺ ጻ ጻ ጼ ጼ ጽ ጽ ጾ ጾ ጽ‍ ጽ‍ / ,Ṣa ṣa Ṣu ṣu Ṣi ṣi Ṣā ṣā Ṣé ṣé Ṣe ṣe Ṣo ṣo Ṣ ṣ / +argobba_ethiopic,ፀ ፀ ፁ ፁ ፂ ፂ ፃ ፃ ፄ ፄ ፅ ፅ ፆ ፆ ፅ‍ ፅ‍ / ,Ṡa ṡa Ṡu ṡu Ṡi ṡi Ṡā ṡā Ṡé ṡé Ṡe ṡe Ṡo ṡo Ṡ ṡ / +argobba_ethiopic,ፈ ፈ ፉ ፉ ፊ ፊ ፋ ፋ ፌ ፌ ፍ ፍ ፎ ፎ ፍ‍ ፍ‍ / ,Fa fa Fu fu Fi fi Fā Fā Fé fé Fe fe Fo fo F f / +argobba_ethiopic,ፐ ፐ ፑ ፑ ፒ ፒ ፓ ፓ ፔ ፔ ፕ ፕ ፖ ፖ ፕ‍ ፕ‍ / ,Pa pa Pu pu Pi pi Pā pā Pé pé Pe pe Po po P p / +argobba_ethiopic,ቨ ቨ ቩ ቩ ቪ ቪ ቫ ቫ ቬ ቬ ቭ ቭ ቮ ቮ ቭ‍ ቭ‍ / ,Va va Vu vu Vi vi Vā vā Vé vé Ve ve Vo vo V v / +argobba_ethiopic,ቈ ቈ ቊ ቊ ቋ ቋ ቌ ቌ ቍ ቍ / ,Qwa qwa Qwi qwi Qwā qwā Qwé qwé Qwe qwe / +argobba_ethiopic,ኈ ኈ ኊ ኊ ኋ ኋ ኌ ኌ ኍ ኍ / ,H̲wa h̲wa H̲wi h̲wi H̲wā h̲wā H̲wé h̲wé H̲we h̲we / +argobba_ethiopic,ኰ ኰ ኲ ኲ ኳ ኳ ኴ ኴ ኵ ኵ / ,Kwa kwa Kwi kwi Kwā kwā Kwé kwé Kwe kwe / +argobba_ethiopic,ጐ ጐ ጒ ጒ (ጓ ጓ) ጔ ጔ ጕ ጕ /,Gwa gwa Gwi gwi (Gwā gwā) Gwé gwé Gwe gwe / +argobba_ethiopic,ሏ ሏ ቧ ቧ ዟ ዟ (ጧ ጧ) ሟ ሟ ቷ ቷ ዧ ዧ (ጯ ጯ) ሯ ሯ ቿ ቿ ዯ ዯ /,Lwa lwa Bwa bwa Zwa zwa (Ṭwa ṭwa) Mwa mwa Twa twa Žwa žwa (Ċwa ċwa) Rwa rwa Čwa čwa Ywa ywa / +argobba_ethiopic,(ጿ ጿ) ሷ ሷ ኗ ኗ ዷ ዷ (ፏ ፏ) ሿ ሿ ኟ ኟ ጇ ጇ ፘ ፘ ፙ ፙ ፚ ፚ / ,(Ṣwa ṣwa) Swa swa Nwa nwa Dwa dwa (Fwa fwa) Šwa šwa Ñwa ñwa Ǧwa ǧwa Rya rya Mya mya Fya fya / +argobba_ethiopic,፩ ፪ ፫ ፬ ፭ ፮ ፯ ፰ ፱ ፲ ፳ ፴ ፵ ፶ ፷ ፸ ፹ ፺ ፻ ፼,1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 10000 diff --git a/test/data/script_samples/armenian.csv b/test/data/script_samples/armenian.csv new file mode 100644 index 0000000..b53d20e --- /dev/null +++ b/test/data/script_samples/armenian.csv @@ -0,0 +1,2 @@ +armenian,Ա ա Բ բ Ղ Ղ ղ Գ գ Ձ Ձ ձ Դ դ Է է Ը ը Եւ Եւ եւ Ե'V Ե'վ ե'վ և Ե ե Թ թ Ժ Ժ ժ Զ զ Ի ի L լ Խ Խ խ Ց Ց ց Ծ Ծ ծ Ք ք Կ կ Չ Չ չ Ճ Ճ ճ Մ մ Յ յ Ն ն Շ Շ շ Հ հ Օ օ Ո ո Փ փ Պ պ Ջ ջ Ռ ռ Ս ս V վ Տ տ Ր ր Ւ ւ Ու ու Ֆ ֆ ﬓ ﬔ ﬕ ﬖ ﬗ,A a B b GH Gh gh G g DZ Dz dz D d Ē ē Ě ě EW Ew ew E'V E'v e'v ev E e Tʻ tʻ ZH Zh zh Z z I i L l KH Kh kh TSʻ Tsʻ tsʻ TS Ts ts Kʻ kʻ K k CHʻ Chʻ chʻ CH Ch ch M m Y y N n SH Sh sh H h Ō ō O o Pʻ pʻ P p J j Ṛ ṛ S s V v T t R r W w U u F f +armenian,ՄՆ Մն մն ﬓ ﬓ ՄԵ Մե մե ﬔ ﬔ ՄԻ Մի մի ﬕ ﬕ ՎՆ Վն վն ﬖ ﬖ ՄԽ ՄԽ Մխ մխ ﬗ ﬗ ﬗ, MN Mn mn m︠n︡ m͡n ME Me me m︠e︡ m͡e MI Mi mi m︠i︡ m͡i VN Vn vn v︠n︡ v͡n MKH MKh Mkh mkh m︠k︡h m︠kh︡ m͡kh diff --git a/test/data/script_samples/assamese.csv b/test/data/script_samples/assamese.csv new file mode 100644 index 0000000..8e25096 --- /dev/null +++ b/test/data/script_samples/assamese.csv @@ -0,0 +1,4 @@ +assamese,কা খা গা ঘা ঙা চা ছা জা ঝা ঞা টা ঠা ডা ড়া ঢা ঢ়া ণা তা থা দা ধা না পা ফা বা ভা মা যা যা রা লা শা ষা সা হা কাঃ কাঁ ৎ ,kā khā gā ghā ṅā cā chā jā jhā ñā ṭā ṭhā ḍā ṛā ḍhā ṛhā ṇā tā thā dā dhā nā pā phā bā bhā mā yā ẏā rā lā śā shā sā hā kāḥ kām̐ t̲ +assamese,আকবা আখবা আগবা আঘবা আঙবা আচবা আছবা আজবা আঝবা আঞবা আটবা আঠবা আডবা আড়বা আঢবা আঢ়বা আণবা আতবা আথবা আদবা আধবা আনবা আপব আফবা আববা আভবা আমবা আযবা আযবা আরবা আলবা আশবা আষবা আসবা আহবা আঃ ।,ākvā ākhvā āgvā āghvā āṅvā ācvā āchvā ājvā ājhvā āñvā āṭvā āṭhvā āḍvā āṛvā āḍhvā āṛhvā āṇvā ātvā āthvā ādvā ādhvā ānvā āpva āphvā ābbā ābhvā āmvā āyvā āẏvā ārvā ālvā āśvā āshvā āsvā āhvā āḥ / +assamese,আঁকা আঁখা আঁগা আঁঘা আঁঙা আঁচা আঁছা আঁজা আঁঝা আঁঞা আঁটা আঁঠা আঁডা আঁড়া আঁঢা আঁঢ়া অঁণ আঁতা আঁথা আঁদা আঁধা অঁন আঁনা ঐঁনৈ ঔঁনৌ ঈঁনী ইঁনি ঊঁনূ উঁনু ঋঁনৃ ৠঁনৄ ঌঁনৢ ৡঁনৣ ওঁনো এঁনে আঁপা আঁফা আঁবা আঁভা আঁমা আঁযা আঁয়া আঁরা আঁলা আঁবা আঁশা আঁষা আঁসা আঁহা ॥ ,ān̐kā ān̐khā ān̐gā ān̐ghā ān̐ṅā ān̐cā ān̐chā ān̐jā ān̐jhā ān̐ñā ān̐ṭā ān̐ṭhā ān̐ḍā ān̐ṛā ān̐ḍhā ān̐ṛhā an̐ṇa ān̐tā ān̐thā ān̐dā ān̐dhā an̐na ān̐nā ain̐nai aun̐nau īn̐nī in̐ni ūn̐nū un̐nu r̥n̐nr̥ r̥̄n̐nr̥̄ l̥n̐nl̥ l̥̄n̐nl̥̄ on̐no en̐ne ām̐pā ām̐phā ām̐bā ām̐bhā ām̐mā ām̐yā ām̐ẏā ām̐rā ām̐lā ām̐śā ām̐shā ām̐sā ām̐hā // +assamese,ক কা কি কী কু কূ কৃ কৄ কৢ কৣ কৢ কে কৈ কো কৌ ০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ৽ ঽ ঽঽ,anna ka kā ki kī ku kū kr̥ kr̥̄ kl̥ kl̥̄ kl̥ ke kai ko kau 0 1 2 3 4 5 6 7 8 9 * ' '' diff --git a/test/data/script_samples/avaric_cyrillic.csv b/test/data/script_samples/avaric_cyrillic.csv new file mode 100644 index 0000000..66ce43c --- /dev/null +++ b/test/data/script_samples/avaric_cyrillic.csv @@ -0,0 +1,2 @@ +avaric_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Ӏ ӏ Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӏ ӏ, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g Ḣ ḣ I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ḣ ḣ +avaric_cyrillic,Ӏ ӏ,Ḣ ḣ diff --git a/test/data/script_samples/awadhi_devanagari.csv b/test/data/script_samples/awadhi_devanagari.csv new file mode 100644 index 0000000..444f5ee --- /dev/null +++ b/test/data/script_samples/awadhi_devanagari.csv @@ -0,0 +1,8 @@ +awadhi_devanagari,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +awadhi_devanagari,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +awadhi_devanagari,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +awadhi_devanagari,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +awadhi_devanagari,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +awadhi_devanagari,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' +awadhi_devanagari,at head of title,at head of title +awadhi_devanagari,MMXXVI MCMXCIX,MMXXVI MCMXCIX diff --git a/test/data/script_samples/azerbaijani_cyrillic.csv b/test/data/script_samples/azerbaijani_cyrillic.csv new file mode 100644 index 0000000..918f298 --- /dev/null +++ b/test/data/script_samples/azerbaijani_cyrillic.csv @@ -0,0 +1,2 @@ +azerbaijani_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ә ә Ғ Ғ ғ Ѓ ѓ Ј ј Һ һ Ө ө Ү ү Ҹ ҹ, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ă ă GH Gh gh Ǵ ǵ I̐ i̐ Ḣ ḣ Ȯ ȯ U̇ u̇ J j +azerbaijani_cyrillic,Ә ә Ғ Ғ ғ Ѓ ѓ Ј ј Һ һ Ө ө Ү ү Ҹ ҹ,Ă ă GH Gh gh Ǵ ǵ I̐ i̐ Ḣ ḣ Ȯ ȯ U̇ u̇ J j diff --git a/test/data/script_samples/balkar_cyrillic.csv b/test/data/script_samples/balkar_cyrillic.csv new file mode 100644 index 0000000..ff93e53 --- /dev/null +++ b/test/data/script_samples/balkar_cyrillic.csv @@ -0,0 +1 @@ +balkar_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z diff --git a/test/data/script_samples/bashkir_cyrillic.csv b/test/data/script_samples/bashkir_cyrillic.csv new file mode 100644 index 0000000..7cd16d0 --- /dev/null +++ b/test/data/script_samples/bashkir_cyrillic.csv @@ -0,0 +1,2 @@ +balkar_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ң Ң Ң Ң ң ң Ҙ Ҙ Ҙ ҙ ҙ ҙ Ә ә Ғ Ғ ғ Һ һ Ө ө Ү ү Ҫ Ҫ ҫ Ҹ ҹ Ҡ ҡ, A a B b CH Ch ch D d Ė ė Ë ë Ę ę E e Ḟ ḟ F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g T︠H︡ T͡H T︠h︡ t͡h t︠h︡ t͡h Ă ă GH Gh gh Ḣ ḣ Ȯ ȯ U̇ u̇ TH Th th J j Q q +balkar_cyrillic,Ң Ң Ң Ң ң ң Ҙ Ҙ Ҙ ҙ ҙ ҙ Ә ә Ғ Ғ ғ Һ һ Ө ө Ү ү Ҫ Ҫ ҫ Ҹ ҹ Ҡ ҡ,N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g T︠H︡ T͡H T︠h︡ t͡h t︠h︡ t͡h Ă ă GH Gh gh Ḣ ḣ Ȯ ȯ U̇ u̇ TH Th th J j Q q diff --git a/test/data/script_samples/belarusian.csv b/test/data/script_samples/belarusian.csv new file mode 100644 index 0000000..99fdbb0 --- /dev/null +++ b/test/data/script_samples/belarusian.csv @@ -0,0 +1,2 @@ +belarusian,А а Б б V в Г г Ґ ґ Д д Е е Ё Ё Ё Ё ё ё Ж Ж Ж Ж ж ж З з И и І і Ї ї Й й К к L л М м Н н О о П п Р р,A a B b V v H h G g D d E e I︠O︡ I͡O I︠o︡ I͡o i︠o︡ i͡o Z︠H︡ Z͡H Z︠h︡ Z͡h z︠h︡ z͡h Z z Ī ī I i Ï ï Ĭ ĭ K k L l M m N n O o P p R r +belarusian,С с Т т У у Ў ў Ф ф Х Х х Ц Ц ц Ч Ч ч Ш Ш ш ШЧ Шч шч Ъ ъ Ы ы Ь ь Ѣ ѣ Э э Ю Ю Ю ю ю Я Я Я Я я,S s T t U u Ŭ ŭ F f KH Kh kh TS Ts ts CH Ch ch SH Sh sh SHCH Shch shch ʺ̳ ʺ Y y ʹ̳ ʹ Ě ě Ė ė I︠U︡ I͡U I︠u︡ i︠u︡ i͡u I︠A︡ I͡A I︠a︡ I͡a i͡a diff --git a/test/data/script_samples/bengali.csv b/test/data/script_samples/bengali.csv new file mode 100644 index 0000000..b749afd --- /dev/null +++ b/test/data/script_samples/bengali.csv @@ -0,0 +1,6 @@ +bengali,কা খা গা ঘা ঙা চা ছা জা ঝা ঞা টা ঠা ডা ড়া ঢা ঢ়া ণা তা থা দা ধা না পা ফা বা ভা মা যা যা রা লা শা ষা সা হা কাঃ কাঁ ৎ ,kā khā gā ghā ṅā cā chā jā jhā ñā ṭā ṭhā ḍā ṛā ḍhā ṛhā ṇā tā thā dā dhā nā pā phā bā bhā mā yā ẏā rā lā śā shā sā hā kāḥ kām̐ t̲ +bengali,আক্বা আখ্বা আগ্বা আঘ্বা আঙ্বা আচ্বা আছ্বা আজ্বা আঝ্বা আঞ্বা আট্বা আঠ্বা আড্বা আড়্বা আঢ্বা আঢ়্বা আণ্বা আত্বা আথ্বা আদ্বা আধ্বা আন্বা,ākvā ākhvā āgvā āghvā āṅvā ācvā āchvā ājvā ājhvā āñvā āṭvā āṭhvā āḍvā āṛvā āḍhvā āṛhvā āṇvā ātvā āthvā ādvā ādhvā ānvā +bengali,আপ্ব আফ্বা আব্বা আভ্বা আম্বা আয্বা আয়্বা আর্বা আল্বা আশ্বা আষ্বা আস্বা আহ্বা আঃ ।,āpva āphvā ābbā ābhvā āmvā āyvā āẏvā ārvā ālvā āśvā āshvā āsvā āhvā āḥ / +bengali,আঁকা আঁখা আঁগা আঁঘা আন্̐ঙা আঁচা আঁছা আঁজা আঁঝা আন্̐ঞা আঁটা আঁঠা আঁডা আঁড়া আঁঢা আঁঢ়া অন্̐ণ আঁতা আঁথা আঁদা আঁধা,ān̐kā ān̐khā ān̐gā ān̐ghā ān̐ṅā ān̐cā ān̐chā ān̐jā ān̐jhā ān̐ñā ān̐ṭā ān̐ṭhā ān̐ḍā ān̐ṛā ān̐ḍhā ān̐ṛhā an̐ṇa ān̐tā ān̐thā ān̐dā ān̐dhā +bengali,অন্̐ন আন্̐না ঐন্̐নৈ ঔন্̐নৌ ঈন্̐নী ইন্̐নি ঊন্̐নূ উন্̐নু ঋন্̐নৃ ৠন্̐নৄ ঌন্̐নৢ ৡন্̐নৣ ওন্̐নো এন্̐নে আঁপা আঁফা আঁবা আঁভা আঁমা আঁযা আঁয়া আঁরা আঁলা আঁশা আঁষা আঁসা আঁহা ॥,an̐na ān̐nā ain̐nai aun̐nau īn̐nī in̐ni ūn̐nū un̐nu r̥n̐nr̥ r̥̄n̐nr̥̄ l̥n̐nl̥ l̥̄n̐nl̥̄ on̐no en̐ne ām̐pā ām̐phā ām̐bā ām̐bhā ām̐mā ām̐yā ām̐ẏā ām̐rā ām̐lā ām̐śā ām̐shā ām̐sā ām̐hā // +bengali,ক কা কি কী কু কূ কৃ কৄ কৢ কৣ কৢ কে কৈ কো কৌ ০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ৽ ঽ ঽঽ,anna ka kā ki kī ku kū kr̥ kr̥̄ kl̥ kl̥̄ kl̥ ke kai ko kau 0 1 2 3 4 5 6 7 8 9 * ' '' diff --git a/test/data/script_samples/bihari_devanagari.csv b/test/data/script_samples/bihari_devanagari.csv new file mode 100644 index 0000000..835734e --- /dev/null +++ b/test/data/script_samples/bihari_devanagari.csv @@ -0,0 +1,8 @@ +bihari_devanagari,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +bihari_devanagari,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +bihari_devanagari,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +bihari_devanagari,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +bihari_devanagari,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +bihari_devanagari,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' +bihari_devanagari,at head of title,at head of title +bihari_devanagari,MMXXVI MCMXCIX,MMXXVI MCMXCIX diff --git a/test/data/script_samples/bodo_bengali.csv b/test/data/script_samples/bodo_bengali.csv new file mode 100644 index 0000000..970ccf0 --- /dev/null +++ b/test/data/script_samples/bodo_bengali.csv @@ -0,0 +1,6 @@ +bodo_bengali,কা খা গা ঘা ঙা চা ছা জা ঝা ঞা টা ঠা ডা ড়া ঢা ঢ়া ণা তা থা দা ধা না পা ফা বা ভা মা যা যা রা লা শা ষা সা হা কাঃ কাঁ ৎ ,kā khā gā ghā ṅā cā chā jā jhā ñā ṭā ṭhā ḍā ṛā ḍhā ṛhā ṇā tā thā dā dhā nā pā phā bā bhā mā yā ẏā rā lā śā shā sā hā kāḥ kām̐ t̲ +bodo_bengali,আক্বা আখ্বা আগ্বা আঘ্বা আঙ্বা আচ্বা আছ্বা আজ্বা আঝ্বা আঞ্বা আট্বা আঠ্বা আড্বা আড়্বা আঢ্বা আঢ়্বা আণ্বা আত্বা আথ্বা আদ্বা আধ্বা আন্বা,ākvā ākhvā āgvā āghvā āṅvā ācvā āchvā ājvā ājhvā āñvā āṭvā āṭhvā āḍvā āṛvā āḍhvā āṛhvā āṇvā ātvā āthvā ādvā ādhvā ānvā +bodo_bengali,আপ্ব আফ্বা আব্বা আভ্বা আম্বা আয্বা আয়্বা আর্বা আল্বা আশ্বা আষ্বা আস্বা আহ্বা আঃ ।,āpva āphvā ābbā ābhvā āmvā āyvā āẏvā ārvā ālvā āśvā āshvā āsvā āhvā āḥ / +bodo_bengali,আঁকা আঁখা আঁগা আঁঘা আন্̐ঙা আঁচা আঁছা আঁজা আঁঝা আন্̐ঞা আঁটা আঁঠা আঁডা আঁড়া আঁঢা আঁঢ়া অন্̐ণ আঁতা আঁথা আঁদা আঁধা,ān̐kā ān̐khā ān̐gā ān̐ghā ān̐ṅā ān̐cā ān̐chā ān̐jā ān̐jhā ān̐ñā ān̐ṭā ān̐ṭhā ān̐ḍā ān̐ṛā ān̐ḍhā ān̐ṛhā an̐ṇa ān̐tā ān̐thā ān̐dā ān̐dhā +bodo_bengali,অন্̐ন আন্̐না ঐন্̐নৈ ঔন্̐নৌ ঈন্̐নী ইন্̐নি ঊন্̐নূ উন্̐নু ঋন্̐নৃ ৠন্̐নৄ ঌন্̐নৢ ৡন্̐নৣ ওন্̐নো এন্̐নে আঁপা আঁফা আঁবা আঁভা আঁমা আঁযা আঁয়া আঁরা আঁলা আঁশা আঁষা আঁসা আঁহা ॥,an̐na ān̐nā ain̐nai aun̐nau īn̐nī in̐ni ūn̐nū un̐nu r̥n̐nr̥ r̥̄n̐nr̥̄ l̥n̐nl̥ l̥̄n̐nl̥̄ on̐no en̐ne ām̐pā ām̐phā ām̐bā ām̐bhā ām̐mā ām̐yā ām̐ẏā ām̐rā ām̐lā ām̐śā ām̐shā ām̐sā ām̐hā // +bodo_bengali,ক কা কি কী কু কূ কৃ কৄ কৢ কৣ কৢ কে কৈ কো কৌ ০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ৽ ঽ ঽঽ,anna ka kā ki kī ku kū kr̥ kr̥̄ kl̥ kl̥̄ kl̥ ke kai ko kau 0 1 2 3 4 5 6 7 8 9 * ' '' diff --git a/test/data/script_samples/bodo_devanagari.csv b/test/data/script_samples/bodo_devanagari.csv new file mode 100644 index 0000000..1511588 --- /dev/null +++ b/test/data/script_samples/bodo_devanagari.csv @@ -0,0 +1,7 @@ +bodo_devanagari,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +bodo_devanagari,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +bodo_devanagari,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +bodo_devanagari,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +bodo_devanagari,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +bodo_devanagari,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' +bodo_devanagari,at head of title,at head of title diff --git a/test/data/script_samples/braj_devanagari.csv b/test/data/script_samples/braj_devanagari.csv new file mode 100644 index 0000000..f364cad --- /dev/null +++ b/test/data/script_samples/braj_devanagari.csv @@ -0,0 +1,6 @@ +braj_devanagari,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +braj_devanagari,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +braj_devanagari,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +braj_devanagari,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +braj_devanagari,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +braj_devanagari,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' diff --git a/test/data/script_samples/bulgarian.csv b/test/data/script_samples/bulgarian.csv new file mode 100644 index 0000000..be104a5 --- /dev/null +++ b/test/data/script_samples/bulgarian.csv @@ -0,0 +1,2 @@ +bulgarian,А а Б б V в Г г Д д Е е Ж Ж ж З з И и Й й К к L л М м Н н О о П п Р р С с Т т У у Ф ф Х Х х ,A a B b V v G g D d E e ZH Zh zh Z z I i Ĭ ĭ K k L l M m N n O o P p R r S s T t U u F f KH Kh kh +bulgarian,Ц Ц Ц Ц ц ц Ч Ч ч Ш Ш ш Щ Шт Щ щ Ъ ъ Ь ь Ѣ Ѣ Ѣ Ѣ ѣ ѣ Ю Ю Ю Ю ю ю Я Я Я Я я я я Ѫ ѫ,T︠S︡ T͡S T︠s︡ T͡s t︠s︡ t͡s CH Ch ch SH Sh sh SHT SHt Sht sht Ŭ ŭ ʹ̳ ʹ I︠E︡ I͡E I︠e︡ I͡e i︠e︡ i͡e I︠U︡ I͡U I︠u︡ I͡u i︠u︡ i͡u I︠A︡ I͡A I︠a︡ I͡a i︠a︡ i͡a i͡a U̐ u̐ diff --git a/test/data/script_samples/buriat.csv b/test/data/script_samples/buriat.csv new file mode 100644 index 0000000..742ba4f --- /dev/null +++ b/test/data/script_samples/buriat.csv @@ -0,0 +1,2 @@ +buriat_cyrillic,А а Б б V в Г г Д д Е е Ё ё Ж Ж ж З з И и Й й К к L л М м Н н О о П п Р р С с Т т У у Ф ф Х Х х Ц Ц Ц Ц ц ц Ч Ч ч Ш Ш ш Щ Щ щ Ъ ъ Ы ы Ь ь Э э Ю Ю Ю Ю ю ю Я Я Я Я я я я Ө ө Ү ү Һ һ,A a B b V v G g D d E e Ë ë ZH Zh zh Z z I i Ĭ ĭ K k L l M m N n O o P p R r S s T t U u F f KH Kh kh T︠S︡ T͡S T︠s︡ T͡s t︠s︡ t͡s CH Ch ch SH Sh sh SHCH ShCh shch ʺ̳ ʺ Y y ʹ̳ ʹ Ė ė I︠U︡ I͡U I︠u︡ I͡u i︠u︡ i͡u I︠A︡ I͡A I︠a︡ I͡a i︠a︡ i͡a i͡a Ȯ ȯ U̇ u̇ Ḣ ḣ +buriat_cyrillic,Ө ө Ү ү Һ һ,Ȯ ȯ U̇ u̇ Ḣ ḣ diff --git a/test/data/script_samples/buriat_cyrillic.csv b/test/data/script_samples/buriat_cyrillic.csv new file mode 100644 index 0000000..742ba4f --- /dev/null +++ b/test/data/script_samples/buriat_cyrillic.csv @@ -0,0 +1,2 @@ +buriat_cyrillic,А а Б б V в Г г Д д Е е Ё ё Ж Ж ж З з И и Й й К к L л М м Н н О о П п Р р С с Т т У у Ф ф Х Х х Ц Ц Ц Ц ц ц Ч Ч ч Ш Ш ш Щ Щ щ Ъ ъ Ы ы Ь ь Э э Ю Ю Ю Ю ю ю Я Я Я Я я я я Ө ө Ү ү Һ һ,A a B b V v G g D d E e Ë ë ZH Zh zh Z z I i Ĭ ĭ K k L l M m N n O o P p R r S s T t U u F f KH Kh kh T︠S︡ T͡S T︠s︡ T͡s t︠s︡ t͡s CH Ch ch SH Sh sh SHCH ShCh shch ʺ̳ ʺ Y y ʹ̳ ʹ Ė ė I︠U︡ I͡U I︠u︡ I͡u i︠u︡ i͡u I︠A︡ I͡A I︠a︡ I͡a i︠a︡ i͡a i͡a Ȯ ȯ U̇ u̇ Ḣ ḣ +buriat_cyrillic,Ө ө Ү ү Һ һ,Ȯ ȯ U̇ u̇ Ḣ ḣ diff --git a/test/data/script_samples/buriat_mongol_bichig.csv b/test/data/script_samples/buriat_mongol_bichig.csv new file mode 100644 index 0000000..ae23e07 --- /dev/null +++ b/test/data/script_samples/buriat_mongol_bichig.csv @@ -0,0 +1,6 @@ +buriat_mongol_bichig,ᠠ ᠎ᠠ ᠎ᠠ ᠡ ᠎ᠡ ᠎ᠡ ᠧ ᠢ  ᠢ ᠣ ᠤ  ᠤ ᠥ ᠦ  ᠦ ᠨ ᠩ ᠨᠭ ᠬ ᠭ ᠭᠠ ᠭᠣ ᠭᠤ ᠭ ᠭᠡ ᠭᠧ ᠭᠢ ᠭᠥ ᠭᠦ,a -a _a e -e _e ė i -I o u -u ȯ u̇ -u̇ n ng nġ q ġ ga go gu g ge gė gi gȯ gu̇ +buriat_mongol_bichig,ᠬᠠ ᠬᠣ ᠬᠤ ᠭ ᠭᠠ ᠭᠣ ᠭᠤ ᠭ ᠭᠡ ᠭᠧ ᠭᠢ ᠭᠥ ᠭᠦ ᠨᠠᠭ ᠨᠡᠭ ᠨᠧᠭ ᠨᠢᠭ ᠨᠣᠭ ᠨᠥᠭ ᠨᠤᠭ ᠨᠦᠭ,qa qo qu ġ ga go gu g ge gė gi gȯ gu̇ naġ neg nėg nig noġ nȯg nuġ nu̇g +buriat_mongol_bichig,ᠪ ᠫ ᠹ ᠰ ᠱ ᠲ ᠼ ᠳ ᠯ ᠮ ᠴ ᠽ ᠵ ᠶ ᠬᠡ ᠬᠧ ᠬᠢ ᠬᠥ ᠬᠦ ᠺᠠ ᠺᠡ ᠺᠢ ᠬᠣ ᠬᠥ ᠬᠤ ᠬᠦ ᠻ ᠷ ᠸ ᠸ ᠾ ᡂ ᡀ ᡁ ᠿ,b p f s ś t tṡ d l m c z j y ke kė ki kȯ ku̇ k̇a k̇e k̇I ko kȯ ku ku̇ kh r v w h ḣ lh zh zr +buriat_mongol_bichig,᠐ ᠑ ᠒ ᠓ ᠔ ᠕ ᠖ ᠗ ᠘ ᠙ /,0 1 2 3 4 5 6 7 8 9 / +buriat_mongol_bichig,ᠮᠣᠩᠭᠣᠯ ᠤᠨ ᠪᠢᠴᠢᠭ ᠦᠨ ᠰᠠᠷ᠎ᠠ ᠶᠢᠨ ᠨᠡᠷ᠎ᠡ ᠶᠢᠨ ᠰᠠᠷ᠎ᠠ ᠨᠡᠷ᠎ᠡ,mongġol-un bicig-u̇n sar-a-yin ner-e-yin sar_a ner_e +buriat_mongol_bichig,ᠪ᠊ ᠨᠠᠰᠤᠨᠪᠠᠲᠤ ᠳ᠋ᠣᠯᠯᠠᠷ ᠮᠠᠲ᠋ᠧᠷᠢᠶᠠᠯ ᠳᠡᠳ᠋ ᠡᠳ᠋ ᠠᠪᠢᠰᠢᠭ᠌ ᠠᠩᠭ᠍ᠯᠢ ᠨᠣᠮ ᠠᠴᠠ᠂ ᠭᠡᠷ ᠡᠴᠡ ᠪ᠊ ᠳ᠊ ᠨᠠ᠊ ᠳᠣᠺᠲ᠋ᠤᠷ,"b. nasunbatu d'ollar mat'ėriyal ded' ed' abisig"" angg`li nom-aca, ger-ece b. d. na. dok̇t'ur" diff --git a/test/data/script_samples/burmese.csv b/test/data/script_samples/burmese.csv index 9ed41b6..74163a5 100644 --- a/test/data/script_samples/burmese.csv +++ b/test/data/script_samples/burmese.csv @@ -1,46 +1,7 @@ -burmese,မဇ္စျိမ,majjhima -burmese,သူများ,sū myāʺ -burmese,ကျွန်တော်,kyvanʻ toʻ -burmese,ကျွန်မ,kyvanʻ ma -burmese,ကျွန်ုပ်,kyvanʻupʻ -burmese,ရှင်,rhaṅʻ -burmese,မြန်မာ,Mranʻ mā -burmese,ကျောင်း,kyoṅʻʺ -burmese,အမေရိကန်,’Amerikanʻ -burmese,အမြွှာ,’a mrvhā -burmese,အစိုးရ,’a cuiʺ ra -burmese,ရာဇဝင်,rājavaṅʻ -burmese,ရာဇိန္ဒြေ,rājindre -burmese,ရာဇဝတ်,rājavatʻ -burmese,ဇီးကွက်,jīʺ kvakʻ -burmese,ဒြပ်နှော,drapʻ nho -burmese,၁၂၃၄၅၀၆၇၈၉,1234506789 -burmese,၂၅၆၈,2568 -burmese,၃၆၆,366 -burmese,၂၀၂၅,2025 -burmese,ဦးဖိုးလှိုင်,Ūʺ Phuiʺ Lhuiṅʻ -burmese,နုနုရည်အင်းဝ,Nu Nu Raññʻ ’Aṅʻʺ va -burmese,ယုဝတီခင်စိန်လှိုင်,Yuvatī Khaṅʻ Cinʻ Lhuiṅʻ -burmese,မြန်မာနိုင်ငံ နှစ်ခြင်း ခရစ်ယာန်အသင်းချုပ်,Mranʻ mā Nuiṅʻ ṅaṃ Nhacʻ khraṅʻʺ Kharacʻyānʻ ’A saṅʻʺ khyupʻ -burmese,သမိုင်းသုတေသနနှင့် အမျိုးသားစာကြည့်တိုက် ဦးစီးဌာန,Samuiṅʻʺ Sutesana nhaṅʻʹ ’A myuiʺ sāʺ Cā kraññʻʹ tuikʻ Ūʺ cīʺ Ṭhāna -burmese,စာပေဗိမာန်,Cā pe Bimānʻ -burmese,ညှောင့်,ññhoṅʻʹ -burmese,ဈာန်ကြွ,jhānʻ krva -burmese,ခညောင်း,kha ññoṅʻʺ -burmese,ထွက်,thvakʻ -burmese,ကြိုက်,kruikʻ -burmese,ခေါ်,khoʻ -burmese,သောက်,sokʻ -burmese,မွေးစား,mveʺ cāʺ -burmese,ဆွဲ,chvai -burmese,ဖြုတ်,phrutʻ -burmese,မန္တလေး,Mantaleʺ -burmese,ရန်ကုန်,Ranʻ kunʻ -burmese,နေပြည်တော်,Ne Praññʻ Toʻ -burmese,မော်လမြိုင်,Moʻ la mruiṅʻ -burmese,တောင်ကြီး,Toṅʻ krīʺ -burmese,စုစု,susu -burmese,မြန်မာနိုင်ငံတွင်ပြောင်းလဲကျင့်သုံးမည့် အချိုးကျကိုယ်စားပြုစနစ်,Mranʻ mā nuiṅʻ ṅaṃ tvaṅʻ proṅʻʺ lai kyaṅʻʹ suṃʺ maññʻʹ ’a khyuiʺ kya kuiyʻ cāʺ pru ca nacʻ -burmese,ကော်မတီ,koʻmatī -burmese,ဘောလုံး,bholuṃʺ -burmese,ဧရာဝတီမြစ်,Erāvatī mracʻ +burmese,က ခ ဂ ဃ င စ ဆ ဇ ဈ ည ဉ ̣ဋ ဌ ဍ ဎ ဏ တ ထ ဒ ဓ န ပ ဖ ဗ ဘ မ ယ ရ လ ဝ သ ဟ ဠ အ,ka kha ga gha ṅa ca cha ja jha ñña ña ̣ṭa ṭha ḍa ḍha ṇa ta tha da dha na pa pha ba bha ma ya ra la va sa ha ḷa ʼa +burmese,က ကာ ကိ ကီ ကု ကူ ကေ ကဲ ကော ကော’ ကို ကျ် ကြ် ကွ် ကဟ် အ အာ ဣ ဤ ဥ ဦ ဧ ဩ ဩ’,ka kā ki kī ku kū ke kai ko koʼ kui ky kr kv kah a ā i ī u ū e o oʼ +burmese,မျှ ကြွ လွှ မြွှင်,myha krva lvha mrvhaṅʻ +burmese,တက္ကသိုလ် တိရစ္ဆာန် ကမ္ဘာ,takkasuilʻ tiracchānʻ kambhā +burmese,၀ ၁ ၂ ၃ ၄ ၅ ၆ ၇ ၈ ၉,0 1 2 3 4 5 6 7 8 9 +burmese,ကော်မီတီ ဥပုသ် ပန်းကန့် ကော်ပြန့် မောင် စိုး တင့် စော စိုန် မောင် ချန် ရီ စိန် ကဲနက် ဘ စိန် မြ သီတာ ပဒေသရာဇာ,koʻmītī upusʻ panʻʺkanʹ koʻpranʻʹ Moṅʻ Cuiʺ Taṅʻʹ Co Cuinʻ Moṅʻ Khyanʻ Rī Cinʻ Kainakʻ Bha Cinʻ Mra Sītā Padesarājā +burmese,မဟာ သမိုင်းတော် ကြီး ညွန့် ပေါင်း ယော အ တွင်း ဝန့် ဦး ဖိုး လှိုင် ဒုဋ္ဌဂါမဏိ မင်း ကြီး ဝတ္ထု,Mahā samuiṅʻʺtoʻ krīʺ ññvanʻʹ poṅʻʺ Yo ʼA tvaṅʻʺ vanʹ Ūʺ Phuiʺ Lhuiṅʻ Duṭṭhagāmaṇi maṅʻʺ krīʺ vatthu diff --git a/test/data/script_samples/chechen_cyrillic.csv b/test/data/script_samples/chechen_cyrillic.csv new file mode 100644 index 0000000..cea0ef7 --- /dev/null +++ b/test/data/script_samples/chechen_cyrillic.csv @@ -0,0 +1,2 @@ +chechen_cyrillic, А а Б б Ч Ч ч Д д Э э Ё ё Ѧ ѧ Е е Ѳ ѳ Ф ф Г г Ӏ ӏ Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӏ ӏ, A a B b CH Ch ch D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ḣ ḣ +chechen_cyrillic,Ӏ ӏ,Ḣ ḣ diff --git a/test/data/script_samples/chukchi_cyrillic.csv b/test/data/script_samples/chukchi_cyrillic.csv new file mode 100644 index 0000000..aa9c745 --- /dev/null +++ b/test/data/script_samples/chukchi_cyrillic.csv @@ -0,0 +1,2 @@ +chukchi_cyrillic,А а Б б Ч Ч ч Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Қ қ Ң Ң Ң Ң ң ң, A a B b CH Ch ch D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Q q N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g +chukchi_cyrillic,Қ қ Ң Ң Ң Ң ң ң,Q q N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g diff --git a/test/data/script_samples/church_slavonic.csv b/test/data/script_samples/church_slavonic.csv new file mode 100644 index 0000000..500d84e --- /dev/null +++ b/test/data/script_samples/church_slavonic.csv @@ -0,0 +1 @@ +church_slavonic,А а Б б В в Г г Д д Є є Е е Ѥ ѥ Ж ж Ѕ ѕ З з И и Й й І і К к Л л М м Н н О о П п Р р С с Т т Ѹ ѹ Ꙋ ꙋ Ф ф Х х Ѿ ѿ Ѡ ѡ Ц ц Ч ч Ш ш Щ щ Ъ ъ Ы ы Ꙑ ꙑ Ь ь Ѣ ѣ Ю ю Ꙗ ꙗ Ѧ ѧ Ѯ ѯ Ѱ ѱ Ѳ ѳ Ѷ ѷ Ѫ ѫ Ѩ ѩ Ѭ ѭ Ћ ћ,A a B b V v G g D d E e Ē ē I͡E i͡e Zh zh Ż ż Z z I i Ĭ ĭ Ī ī K k L l M m N n O o P p R r S s T t U u Ū ū F f Kh kh Ō͡T ō͡t Ō ō T͡S t͡s Ch ch Sh sh Sht sht ʺ̳ ʺ Ȳ ȳ Y y ʹ̳ ʹ Ě ě I͡U i͡u I͡A i͡a Ę ę K͡S k͡s P͡S p͡s Ḟ ḟ Ẏ ẏ Ǫ ǫ I͡Ę i͡ę I͡Ǫ i͡ǫ Ǵ ǵ diff --git a/test/data/script_samples/chuvash_cyrillic.csv b/test/data/script_samples/chuvash_cyrillic.csv new file mode 100644 index 0000000..74f2aec --- /dev/null +++ b/test/data/script_samples/chuvash_cyrillic.csv @@ -0,0 +1,2 @@ +chuvash_cyrillic,А а Б б Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӑ ӑ Ӗ ӗ Ԡ ԡ Ԣ ԣ Ҫ ҫ Ћ Ћ ћ Ӱ ӱ, A a B b D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ă ă Ĕ ĕ Ĺ ĺ Ń ń Ś ś CH Ch ch U̇ u̇ +chuvash_cyrillic,Ӑ ӑ Ӗ ӗ Ԡ ԡ Ԣ ԣ Ҫ ҫ Ћ Ћ ћ Ӱ ӱ,Ă ă Ĕ ĕ Ĺ ĺ Ń ń Ś ś CH Ch ch U̇ u̇ diff --git a/test/data/script_samples/coptic.csv b/test/data/script_samples/coptic.csv new file mode 100644 index 0000000..f6cf526 --- /dev/null +++ b/test/data/script_samples/coptic.csv @@ -0,0 +1,2 @@ +coptic,Ⲁ ⲁ Ⲃ ⲃ Ⲅ ⲅ Ⲇ ⲇ Ⲉ ⲉ Ⲍ ⲍ Ⲏ ⲏ Ⲑ Ⲑ ⲑ Ⲓ ⲓ Ϊ ϊ Ⲕ ⲕ L ⲗ Ⲙ ⲙ Ⲛ ⲛ X ⲝ Ⲟ ⲟ Ⲡ ⲡ Ⲣ ⲣ Ⲥ ⲥ Ⲧ ⲧ Ⲩ ⲩ Ϋ ϋ Ⲫ Ⲫ ⲫ Ⲭ Ⲭ ⲭ Ⲯ Ⲯ ⲯ Ⲱ ⲱ Ϣ ϣ Ϥ ϥ Ϩ ϩ ⳪ Ⳬ Ⳓ Ⳓ ⳓ Ϯ Ϯ ϯ Ẍ ⳉ Ϧ ϧ Ⲳ ⲳ Ⲹ ⲹ Ⲻ ⲻ Ⳃ Ⳃ ⳃ ⳃ Ⳋ ⳋ Ⳟ ⳟ Ⳟ ⳟ Ⳡ ⳡ Ⳡ ⳡ Ⳣ ⳣ,A a B b G g D d E e Z z Ē ē TH Th th I i Ï ï K k L l M m N n X x O o P p R r S s T t U u Ü ü PH Ph ph CH Ch ch PS Ps ps Ō ō Š š F f H h Č č KY Ky ky TI Ti ti Ẍ ẍ Ḥ ḥ '̳ ' K̤ k̤ N̈ n̈ Ç̈ Ç̈ ç̈ ç̈ Ç ç Ŋ ŋ N̳ n̳ Ɲ ɲ N̲ n̲ W w +coptic,ⲁ̅ ⲃ̅ ⲅ̅ ⲇ̅ ⲉ̅ ⲋ̅ ⲍ̅ ⲏ̅ ⲑ̅ ⲓ̅ ⲕ̅ ⲗ̅ ⲙ̅ ⲛ̅ ⲝ̅ ⲟ̅ ⲡ̅ ϥ̅ ⲣ̅ ⲥ̅ ⲧ̅ ⲩ̅ ⲫ̅ ⲭ̅ ⲯ̅ ⲱ̅ ⳁ̅ ⲁ̿ ⲃ̿ ⲅ̿ ⲇ̿ ⲉ̿ ⲋ̿ ⲍ̿ ⲏ̿ ⲑ̿,1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 200 300 400 500 600 700 800 900 1000 2000 3000 4000 5000 6000 7000 8000 9000 diff --git a/test/data/script_samples/cyrillic_generic.csv b/test/data/script_samples/cyrillic_generic.csv new file mode 100644 index 0000000..4b3b666 --- /dev/null +++ b/test/data/script_samples/cyrillic_generic.csv @@ -0,0 +1,8 @@ +cyrillic_generic,Ӕ ӕ Ӑ ӑ Ә ә Ӓ ӓ Ӛ ӛ Ԙ ԙ Ӕ ӕ А а Б б Ҿ ҿ Ҽ ҽ Ћ ћ Ӵ ӵ Ӌ ӌ Ч ч Ц̳и ц̳и Ц̳ ц̳ Ԃ ԃ Ԁ ԁ,A͡E a͡e Ặ ặ Ă ă Ä ä A̋ a̋ A̐ a̐ Æ æ A a B b Ćh́ ćh́ Ćh ćh Ć ć C̈h c̈h C̣h c̣h Ch ch Ci ci C c D́ d́ Ḋ ḋ +cyrillic_generic, Ԭ ԭ Ԫ ԫ Ӡ ӡ Џ џ Ди ди Д д Ѐ ѐ Є є Ҍ ҍ Э э Ӭ ӭ Ё ё Е̦ е̦ Е е Ѳ ѳ Ф ф Ӻ ӻ Ғ ғ Ѓ ѓ Ґ ґ Ӷ ӷ Ҕ ҕ Г г ,D͡Ch d͡ch D͡Zh d͡zh D͡Z d͡z DŽ dž Di di D d È è Ē ē Ẹ̆ ẹ̆ Ė ė Ẹ̈ ẹ̈ Ë ë E̦ e̦ E e Ḟ ḟ F f Gḩ gḩ Gh gh Ǵ ǵ Ğ ğ G̣ g̣ Ģ ģ G g +cyrillic_generic,Ӿ ӿ Ӽ ӽ Һ һ Ӏ ӏ Г г Ѝ ѝ Ӣ ӣ І і Ҋ ҋ Й й Ӥ ӥ Ї ї Ј ј Я я Ѥ ѥ Ѩ ѩ Ѣ ѣ Ѭ ѭ Ю ю И и Ҹ ҹ Ӂ ӂ Ҷ ҷ,H̄ h̄ Ḩ ḩ Ḣ ḣ Ḧ ḧ H h Ì ì Ị̄ ị̄ Ī ī Ị̆ ị̆ Ĭ ĭ Ị̈ ị̈ Ï ï I̐ i̐ I͡A i͡a I͡Ē i͡ē I͡Ę i͡ę I͡E i͡e I͡Ǫ i͡ǫ I͡U i͡u I i J̄ j̄ J̆ j̆ Ĵ ĵ +cyrillic_generic, Ӝ ӝ Җ җ Ԟ ԟ Ќ ќ Ҳ ҳ Х х Ѯ ѯ К к Љ љ Ԕ ԕ Ĺи ԉи L̦и ӆи Ḷи ԯи Ļи ԓи L̤и ԡи Ли ли L л Ӎ ӎ М м, J̈ j̈ J j K̀ k̀ Ḱ ḱ K͡H k͡h Kh kh K͡S k͡s K k LJ lj Lkḣ lkḣ Ĺi ĺi L̦i l̦i Ḷi ḷi Ļi ļi L̤i l̤i Li li L l Ṃ ṃ M m +cyrillic_generic, Њ њ Ҥ ҥ Ԋ ԋ Ԩ ԩ Ԣ ԣ Ӊ ӊ Ӈ ӈ Ң ң № Н н Ѽ ѽ Ѻ ѻ Ѿ ѿ Ѡ ѡ Ӫ ӫ Ө ө Ӧ ӧ Ѹ ѹ О о,NJ nj ŃG̀ ńg̀ Ń ń N̦ n̦ Ņ ņ Ṇ͡G ṇ͡g Ņ͡G ņ͡g N͡G n͡g No̲ N n Õ õ Ọ̄ ọ̄ Ō͡T ō͡t Ō̤ ō̤ Ō ō Ȯ ȯ Ö ö O͡U o͡u O o +cyrillic_generic, Ҧ ҧ Ԥ ԥ Ѱ ѱ П п Ҡ ҡ Ҁ ҁ Ҟ ҟ Ӄ ӄ Ԛ ԛ Қ қ Ԗ ԗ Ҏ ҏ Р р Щ щ Ԧ ԧ Ш ш Ԍ ԍ Ѕ ѕ С с Ҫ ҫ, Ph ph P̣ p̣ P͡S p͡s P p Q̀ q̀ Q̂ q̂ Q̄ q̄ Q̇ q̇ Q̈ q̈ Q q Rkḣ rkḣ R̆ r̆ R r Shch shch Sḥ sḥ Sh sh Ś ś Ṡ ṡ S s Th th +cyrillic_generic, Ԏ ԏ Ҭ ҭ Ҙ ҙ Ҵ ҵ Ц ц Т т Ұ ұ Ӯ ӯ Ў ў Ү ү Ӱ ӱ Ӳ ӳ У у V́и в́и V̈и ѷи Ви Ви ви V в Ԝ ԝ Ҩ ҩ, T́ t́ Ţ ţ T͡H t͡h T͡Ṡ t͡ṡ T͡S t͡s T t Û û Ū ū Ŭ ŭ U̇ u̇ Ü ü Ű ű U u V́i v́i V̈i v̈i Vi Vi vi V v Ẅ ẅ W w +cyrillic_generic, Х̳о X х̳ Ѵ ѵ Ӹ ӹ Ы ы Ж ж Ԅ ԅ Ԑ ԑ Ԇ ԇ З з Ђ ђ Ь ь Ъ ъ « »,Xo X x Ẏ ẏ Ÿ ÿ Y y Zh zh Ź ź Ż ż Z̧ z̧ Z z Đ đ ʹ̳ ʹ ʺ̳ ʺ << >> diff --git a/test/data/script_samples/dargwa_cyrillic.csv b/test/data/script_samples/dargwa_cyrillic.csv new file mode 100644 index 0000000..a211475 --- /dev/null +++ b/test/data/script_samples/dargwa_cyrillic.csv @@ -0,0 +1,2 @@ +dargwa_cyrillic,А а Б б Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӏ ӏ,A a B b D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ḣ ḣ +dargwa_cyrillic,Ӏ ӏ,Ḣ ḣ diff --git a/test/data/script_samples/devanagari_generic.csv b/test/data/script_samples/devanagari_generic.csv new file mode 100644 index 0000000..135a878 --- /dev/null +++ b/test/data/script_samples/devanagari_generic.csv @@ -0,0 +1,8 @@ +devanagari_generic,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +devanagari_generic,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +devanagari_generic,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +devanagari_generic,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +devanagari_generic,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +devanagari_generic,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' +devanagari_generic,at head of title,at head of title +devanagari_generic,MMXXVI MCMXCIX,MMXXVI MCMXCIX diff --git a/test/data/script_samples/divehi_thaana.csv b/test/data/script_samples/divehi_thaana.csv new file mode 100644 index 0000000..e711d42 --- /dev/null +++ b/test/data/script_samples/divehi_thaana.csv @@ -0,0 +1,3 @@ +divehi_thaana,ހް ށް ން ރް ބް ޅް ކް އް ވް މް ފް ދް ތް ލް ގް ޏް ސް ޑް ޖް ޗް ޒް ޓް ޕް ޔް ޘް ޙް ޚް ޛް ޝް ޞް ޡް ޠް ޡް ޢް ޣް ޤް އަ އާ އި އީ އު އޫ އެ އޭ އޮ އޯ,h ś n r b ḷ k ʼ v m f d t l g ñ s ḍ j c z ṭ p y th ḥ kh dh sh ṣ d̲ t̤ ẓ ʻ gh q a ā i ī u ū e ē o ō +divehi_thaana,އަށްވަނަ މަށަށް އަނގަ ހަނދު އަތަ އިދު އުމުރު އެގަހުގި ހައިހޫނަ ފައިސަ ކްއީން ޗައްޕަލު އައްޕައްޗި ބޮއް ބިހެއް އަތްތެރި,aḫvana maśaḫ aṁga haṁdu ata idu umuru egahugi haʼihūna faʼisa kʼīn cappalu appacci boh̲ biheh̲ at̲teri +divehi_thaana,"$a ދިވެހިރާއްޖޭގެ ހިރިގަލު މިސްކިތްތަހް : $b ބް އިންޑިާ ކަންދުން ގެއްލެމުންދާ ތަރިކަ : މަޢުރަޒު ލުޢިފޮތް / $c ކުރަހާ އަފި އެދިޓުކުރީ މަޢުރޫފު ޖަމީލް ; ތަރުޖަމާކުރީ, މުހަނމަދު ވަހީދު (މަޑުލު).","$a Divehirājjēge hirigalu miskit̲tah : $b Inḍiā Kandun gellemundā tarika : maʻurazu luʻifot / $c kurahā afi ediṭukurī Maʻurūfu Jamīl ; tarujamākurī, Muhaṁmadu Vahīdu (Maḍulu)." diff --git a/test/data/script_samples/dogri_devanagari.csv b/test/data/script_samples/dogri_devanagari.csv new file mode 100644 index 0000000..b68cf60 --- /dev/null +++ b/test/data/script_samples/dogri_devanagari.csv @@ -0,0 +1,7 @@ +dogri_devanagari,क क़ ख ख़ ग ग़ घ घ़ ॻ ङ च छ ज ज़ झ ञ ॹ ॼ ट ट़ ठ ड ड़ ढ ढ़ ॾ त थ द ध न ऩ प फ फ़ ब भ म य र ल व श ष स स़ ह ह़ नं नँ नः ऽ,ka qa kha k̲h̲a ga g̲h̲a gha g̳h̳a ġa ṅa ca cha ja za jha ña z̤a j̈a ṭa t̤a ṭha ḍa ṛa ḍha ṛha d̤a ta tha da dha na n̤a pa pha fa ba bha ma ya ra la va śa sha sa s̤a ha h̤a naṃ nam̐ naḥ ' +dogri_devanagari,अ ॲ आ ऄ ऐ ꣾ औ ॵ ए ऎ ऍ इ ई ओ ऒ ऑ ॳ ॴ उ ऊ ॶ ॷ ऌ ॡ ऋ ॠ,a â ā ă ai ăi au ău e ĕ ê i ī o ŏ ô ȯ ö u ū u̇ ü ḷ ḹ ṛ ṝ +dogri_devanagari,क का कै कौ कॏ के कॆ कॅ कि की को कॊ कॉ कऺ कऻ कु कू कॖ कॗ कॢ कॣ कृ कॄ,ka kā kai kau kău ke kĕ kê ki kī ko kŏ kô kȯ kö ku kū ku̇ kü kl̥ kl̥̄ kr̥ kr̥̄ +dogri_devanagari,अंक अंक़ अंख अंख़ अंग अंग़ अंघ अंघ़ अंङ अंच अंछ अंज अंज़ अंझ अंञ अंट अंट़ अंठ अंड अंड़ अंढ अंढ़ अंण अंत अंथ अंद अंध अंन अंप अंफ अंफ़ अंब अंभ अंम अंय अंर अंल अंव अंश अंष अंस अंस़ अंह अंह़,aṅka aṅqa aṅkha aṅk̲h̲a aṅga aṅg̲h̲a aṅgha aṅg̳h̳a aṅṅa añca añcha añja añza añjha añña aṇṭa aṇt̤a aṇṭha aṇḍa aṇṛa aṇḍha aṇṛha aṇṇa anta antha anda andha anna ampa ampha amfa amba ambha amma aṃya aṃra aṃla aṃva aṃśa aṃsha aṃsa aṃs̤a aṃha aṃh̤a +dogri_devanagari,अँक अँक़ अँख अँख़ अँग अँग़ अँघ अँघ़ अँङ अँच अँछ अँज अँज़ अँझ अँञ अँट अँट़ अँठ अँड अँड़ अँढ अँढ़ अँण अँत अँथ अँद अँध अँन अँप अँफ अँफ़ अँब अँभ अँम अँय अँर अँल अँव अँश अँष अँस अँस़ अँह अँह़ अँ,an̐ka an̐qa an̐kha an̐k̲h̲a an̐ga an̐g̲h̲a an̐gha an̐g̳h̳a an̐ṅa an̐ca an̐cha an̐ja an̐za an̐jha an̐ña an̐ṭa an̐t̤a an̐ṭha an̐ḍa an̐ṛa an̐ḍha an̐ṛha an̐ṇa an̐ta an̐tha an̐da an̐dha an̐na am̐pa am̐pha am̐fa am̐ba am̐bha am̐ma am̐ya am̐ra am̐la am̐va am̐śa am̐sha am̐sa am̐s̤a am̐ha am̐h̤a am̐ +dogri_devanagari,० १ २ ३ ४ ५ ६ ७ ८ ९ । ॥ ऽ,0 1 2 3 4 5 6 7 8 9 / // ' +dogri_devanagari,at head of title,at head of title diff --git a/test/data/script_samples/dungan_cyrillic.csv b/test/data/script_samples/dungan_cyrillic.csv new file mode 100644 index 0000000..fa2b362 --- /dev/null +++ b/test/data/script_samples/dungan_cyrillic.csv @@ -0,0 +1,2 @@ +dungan_cyrillic,А а Б б Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ы ы Ь ь Ъ ъ Ж Ж ж З з Ә ә Җ Җ Ң Ң Ң Ң ң ң Ў ў Ү ү,A a B b D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z Ă ă J j N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ŭ ŭ U̇ u̇ +dungan_cyrillic,Ә ә Җ Җ Ң Ң Ң Ң ң ң Ў ў Ү ү, Ă ă J j N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ŭ ŭ U̇ u̇ diff --git a/test/data/script_samples/dzongkha_tibetan.csv b/test/data/script_samples/dzongkha_tibetan.csv new file mode 100644 index 0000000..d897c61 --- /dev/null +++ b/test/data/script_samples/dzongkha_tibetan.csv @@ -0,0 +1,7 @@ +dzongkha_tibetan,ཀ༌ཁ༌ག༌ང༌ཆ༌ཇ༌ཉ༌ཏ༌ཐ༌ད༌ན༌པ༌ཕ༌བ༌མ༌ཙ༌ཚ༌ཛ༌ཝ༌ཞ༌ཟ༌འ༌ཡ༌ར༌ལ༌ཤ༌ས༌ཧ༌ཊ༌ཋ༌ཌ༌ཎ༌ཥ༌གྷ༌དྷ༌བྷ༌ཛྷ༌ཌྷ༌ཀཾ༌ཀྃ༌྅༌ཨ༌ཨི༌ཱི༌ཨུ༌ཨཱུ༌ཨེ༌ཨོ༌ཨཱ༌ཨཻ༌ཨཽ༌ལླྀ༌ལླཱྀ༌རྲྀ༌རྲཱྀ,ka kha ga nga cha ja nya ta tha da na pa pha ba ma tsa tsha dza wa zha za ʼa ya ra la sha sa ha ṭa ṭha ḍa ṇa ṣa gha dha bha dzha ḍha kaṃ kam̐ ` a i ī u ū e o ā ai au l̥ l̥̄ r̥ r̥̄ +dzongkha_tibetan,གསོ༌༌གནུབས༌གནོན༌དགོན༌དཔེ༌དངོས༌བསྟེན༌བསྒྲིགས༌བརྒྱུད༌མཇུག༌མདུན༌མགྲོན༌འཕགས༌འཕྲོད༌འཛིན༌འགྱུར༌འགྲོས༌འབྱོར༌འཇིགས,gso gnubs gnon dgon dpe dngos bsten bsgrigs brgyud mjug mdun mgron ʼphags ʼphrod ʼdzin ʼgyur ʼgros ʼbyor ʼjigs +dzongkha_tibetan,བོད༌ཀྱི༌ཆོས༌འབྱུང༌དང༌རྒྱལ༌རབས༌ཀྱི༌སྐོར༌:༌སྦ༌བཞེད༌དེབ༌ཐེར༌དམར༌པོ༌:༌དེབ༌ཐེར༌དཀར༌པོ༌:༌གངས་ཅན༌བོད༌ཀྱི༌རྒྱལ༌བསྟན༌ཕྱི༌མོ,Bod kyi chos ʾbyung dang rgyal rabs kyi skor : Sba bzhed Deb ther dmar po : Deb ther dkar po : Gangs-can Bod kyi rgyal bstan phyi mo +dzongkha_tibetan,"སྔ༌འགྱུར༌རྙིང༌མའི༌བྱུང༌བ༌མདོ༌ཙམ༌བརྗོད༌པ༌པདྨ༌དམ༌རྭ༌གའི༌དོ༌ཤལ༌གཞོན༌ནུ༌དགྱེས༌པའི༌མགུལ༌རྒྱན༌/༌སྦ་གསལ་སྣང,","snga ʾgyur rnying maʾi byung ba mdo tsam brjod pa Padma dma rwa gaʾi do shal gzhon nu dgyes paʾi mgul rgyan / Sba-gsal-snang, " +dzongkha_tibetan,"ཚལ་པ༌ཀུན་དགའ་རྡོ་རྗེ,༌དགེ་འདུན་ཆོས་འཕེལ,༌ཁམས་སྤྲུལ༌བསོད་ནམས་དོན་གྲུབ༌/","Tshal-pa Kun-dgaʾ-rdo-rje, Dge-ʾdun-chos-ʾphel, Khams-sprul Bsod-nams-don-grub / " +dzongkha_tibetan,པའི༌བེའུ༌མིའི༌དགེའི༌ཤུའི༌མོའི༌ཐུའུ༌གསོའི༌པའོ,paʼi beʼu miʼi dgeʼi shuʼi moʼi Thuʼu gsoʼi paʼo +dzongkha_tibetan,འོས༌འོད༌མོའི༌སྐུའི༌བེའི༌འབུམ༌མིའི༌དགེའི༌ཐུའུ༌ཀན༌ཤཱ་ཀྱའི༌ལི༌ཁྲིའི༌ལས༌གཞིའི༌མཐོང༌བའི,ʼos ʼod moʼi skuʼi beʼi ʼbum miʼi dgeʼi thuʼu kan shā-kyaʼi li khriʼi las gzhiʼi mthong baʼi diff --git a/test/data/script_samples/eskimo_yuit_cyrillic.csv b/test/data/script_samples/eskimo_yuit_cyrillic.csv new file mode 100644 index 0000000..9f76318 --- /dev/null +++ b/test/data/script_samples/eskimo_yuit_cyrillic.csv @@ -0,0 +1,2 @@ +eskimo_yuit_cyrillic,А а Б б Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ы ы Ь ь Ъ ъ Ж Ж ж З з Г' Г' г' К' к' Л' л' Н' Н' Н' Н' н' н' Х' х' Ў ў,A a B b D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z GH Gh gh Q q Ĺ ĺ N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ḣ ḣ W w +eskimo_yuit_cyrillic,Г' Г' г' К' к' Л' л' Н' Н' Н' Н' н' н' Х' х' Ў ў,GH Gh gh Q q Ĺ ĺ N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ḣ ḣ W w diff --git a/test/data/script_samples/ethiopic_generic.csv b/test/data/script_samples/ethiopic_generic.csv new file mode 100644 index 0000000..d8f1047 --- /dev/null +++ b/test/data/script_samples/ethiopic_generic.csv @@ -0,0 +1,41 @@ +ethiopic_generic,ሀ ሀ ሁ ሁ ሂ ሂ ሃ ሃ ሄ ሄ ህ ህ ሆ ሆ ህ‍ ከህ‍,Ha ha Hu hu Hi hi Hā hā Hé hé He he Ho ho H kah +ethiopic_generic,ለ ለ ሉ ሉ ሊ ሊ ላ ላ ሌ ሌ ል ል ሎ ሎ ል‍ ከል‍ /,La la Lu lu Li li Lā lā Lé lé Le le Lo lo L kal / +ethiopic_generic,ሐ ሐ ሑ ሑ ሒ ሒ ሓ ሓ ሔ ሔ ሕ ሕ ሖ ሖ ሕ‍ ከሕ‍ / ,Ḥa ḥa Ḥu ḥu Ḥi ḥi Ḥā ḥā Ḥé ḥé Ḥe ḥe Ḥo ḥo Ḥ kaḥ / +ethiopic_generic,መ መ ሙ ሙ ሚ ሚ ማ ማ ሜ ሜ ም ም ሞ ሞ ም‍ ከም‍ / ,Ma ma Mu mu Mi mi Mā mā Mé mé Me me Mo mo M kam / +ethiopic_generic,ሠ ሠ ሡ ሡ ሢ ሢ ሣ ሣ ሤ ሤ ሥ ሥ ሦ ሦ ሥ‍ ከሥ‍ / ,Śa śa Śu śu Śi śi Śā śā Śé śé Śe śe Śo śo Ś kaś / +ethiopic_generic,ረ ረ ሩ ሩ ሪ ሪ ራ ራ ሬ ሬ ር ር ሮ ሮ ር‍ ከር‍ / ,Ra ra Ru ru Ri Ri Rā rā Ré ré Re re Ro ro R kar / +ethiopic_generic,ሰ ሰ ሱ ሱ ሲ ሲ ሳ ሳ ሴ ሴ ስ ስ ሶ ሶ ስ‍ ከስ‍ / ,Sa sa Su su Si si Sā sā Sé sé Se se So so S kas / +ethiopic_generic,ሸ ሸ ሹ ሹ ሺ ሺ ሻ ሻ ሼ ሼ ሽ ሽ ሾ ሾ ሽ‍ ከሽ‍ /,Ša ša Šu šu Ši ši Šā šā Šé šé Še še Šo šo Š kaš / +ethiopic_generic,ቀ ቀ ቁ ቁ ቂ ቂ ቃ ቃ ቄ ቄ ቅ ቅ ቆ ቆ ቅ‍ ከቅ‍ / ,Qa qa Qu qu Qi qi Qā qā Qé qé Qe qe Qo qo Q kaq / +ethiopic_generic,በ በ ቡ ቡ ቢ ቢ ባ ባ ቤ ቤ ብ ብ ቦ ቦ ብ‍ ከብ‍ / ,Ba ba Bu bu Bi bi Bā bā Bé bé Be be Bo bo B kab / +ethiopic_generic,ተ ተ ቱ ቱ ቲ ቲ ታ ታ ቴ ቴ ት ት ቶ ቶ ት‍ ከት‍ / ,Ta ta Tu tu Ti ti Tā tā Té té Te te To to T kat / +ethiopic_generic,ቸ ቸ ቹ ቹ ቺ ቺ ቻ ቻ ቼ ቼ ች ች ቾ ቾ ች‍ ች‍ / ,Ča ča Ču ču Či či Čā čā Čé čé Če če Čo čo Č č / +ethiopic_generic,ኀ ኀ ኁ ኁ ኂ ኂ ኃ ኃ ኄ ኄ ኅ ኅ ኆ ኆ ኅ‍ ኅ‍ / ,H̲a h̲a H̲u h̲u H̲i h̲i H̲ā h̲ā H̲é h̲é H̲e h̲e H̲o h̲o H̲ h̲ / +ethiopic_generic,ነ ነ ኑ ኑ ኒ ኒ ና ና ኔ ኔ ን ን ኖ ኖ ን‍ ን‍ / ,Na na Nu nu Ni ni Nā nā Né né Ne ne No no N n / +ethiopic_generic,ኘ ኘ ኙ ኙ ኚ ኚ ኛ ኛ ኜ ኜ ኝ ኝ ኞ ኞ ኝ‍ ኝ‍ / ,Ña ña Ñu ñu Ñi ñi Ñā ñā Ñé ñé Ñe ñe Ño ño Ñ ñ / +ethiopic_generic,አ አ ኡ ኡ ኢ ኢ ኣ ኣ ኤ ኤ እ እ ኦ ኦ / ,ʼA ʼa ʼU ʼu ʼI ʼi ʼĀ ʼā ʼÉ ʼé ʼE ʼe ʼO ʼo / +ethiopic_generic,ከ ከ ኩ ኩ ኪ ኪ ካ ካ ኬ ኬ ክ ክ ኮ ኮ ክ‍ ክ‍ / ,Ka ka Ku ku Ki ki Kā kā Ké ké Ke ke Ko ko K k / +ethiopic_generic,ኸ ኸ ኹ ኹ ኺ ኺ ኻ ኻ ኼ ኼ ኽ ኽ ኾ ኾ ኽ‍ ኽ‍ ኧ ኧ /,Xa xa Xu xu Xi xi Xā xā Xé xé Xe xe Xo xo X x / +ethiopic_generic,ዉ ዉ ዊ ዊ ዋ ዋ ዌ ዌ ው ው ዎ ዎ ው‍ ው‍ / ,Wa wa Wu wu Wi wi Wā wā Wé wé We we Wo wo W w / +ethiopic_generic,ዐ ዐ ዑ ዑ ዒ ዒ ዔ ዔ ዕ ዕ ዖ ዖ / ,ʻA ʻa ʻU ʻu ʻI ʻi ʻÉ ʻé ʻE ʻe ʻO ʻo / +ethiopic_generic,ዘ ዘ ዙ ዙ ዚ ዚ ዛ ዛ ዜ ዜ ዝ ዝ ዞ ዞ ዝ‍ ዝ‍ / ,Za za Zu zu Zi zi Zā zā Zé zé Ze ze Zo zo Z z / +ethiopic_generic,ዠ ዠ ዡ ዡ ዢ ዢ ዣ ዣ ዤ ዤ ዥ ዥ ዦ ዦ ዥ‍ ዥ‍ / ,Ža ža Žu žu Ži ži Žā žā Žé žé Že že Žo žo Ž ž / +ethiopic_generic,የ የ ዩ ዩ ዪ ዪ ያ ያ ዬ ዬ ይ ይ ዮ ዮ ይ‍ ይ‍ / ,Ya ya Yu yu Yi yi Yā yā Yé yé Ye ye Yo yo Y y / +ethiopic_generic,ደ ደ ዱ ዱ ዲ ዲ ዳ ዳ ዴ ዴ ድ ድ ዶ ዶ ድ‍ ድ‍ / ,Da da Du du Di di Dā dā Dé dé De de Do do D d / +ethiopic_generic,ጀ ጀ ጁ ጁ ጂ ጂ ጃ ጃ ጄ ጄ ጅ ጅ ጆ ጆ ጅ‍ ጅ‍ / ,Ǧa ǧa Ǧu ǧu Ǧi ǧi Ǧā ǧā Ǧé ǧé Ǧe ǧe Ǧo ǧo Ǧ ǧ / +ethiopic_generic,ገ ገ ጉ ጉ ጊ ጊ ጋ ጋ ጌ ጌ ግ ግ ጎ ጎ ግ‍ ግ‍ / ,Ga ga Gu gu Gi gi Gā gā Gé gé Ge ge Go go G g / +ethiopic_generic,ጠ ጠ ጡ ጡ ጢ ጢ ጣ ጣ ጤ ጤ ጥ ጥ ጦ ጦ ጥ‍ ጥ‍ / ,Ṭa ṭa Ṭu ṭu Ṭi ṭi Ṭā ṭā Ṭé ṭé Ṭe ṭe Ṭo ṭo Ṭ ṭ / +ethiopic_generic,ጨ ጨ ጩ ጩ ጪ ጪ ጫ ጫ ጬ ጬ ጭ ጭ ጮ ጮ ጭ‍ ጭ‍ / ,Ċa ċa Ċu ċu Ċi ċi Ċā ċā Ċé ċé Ċe ċe Ċo ċo Ċ ċ / +ethiopic_generic,ጰ ጰ ጱ ጱ ጲ ጲ ጳ ጳ ጴ ጴ ጵ ጵ ጶ ጶ ጵ‍ ጵ‍ / ,P̣a p̣a P̣u p̣u P̣i p̣i P̣ā p̣ā P̣é p̣é P̣e p̣e P̣o p̣o P̣ p̣ / +ethiopic_generic,ጸ ጸ ጹ ጹ ጺ ጺ ጻ ጻ ጼ ጼ ጽ ጽ ጾ ጾ ጽ‍ ጽ‍ / ,Ṣa ṣa Ṣu ṣu Ṣi ṣi Ṣā ṣā Ṣé ṣé Ṣe ṣe Ṣo ṣo Ṣ ṣ / +ethiopic_generic,ፀ ፀ ፁ ፁ ፂ ፂ ፃ ፃ ፄ ፄ ፅ ፅ ፆ ፆ ፅ‍ ፅ‍ / ,Ṡa ṡa Ṡu ṡu Ṡi ṡi Ṡā ṡā Ṡé ṡé Ṡe ṡe Ṡo ṡo Ṡ ṡ / +ethiopic_generic,ፈ ፈ ፉ ፉ ፊ ፊ ፋ ፋ ፌ ፌ ፍ ፍ ፎ ፎ ፍ‍ ፍ‍ / ,Fa fa Fu fu Fi fi Fā Fā Fé fé Fe fe Fo fo F f / +ethiopic_generic,ፐ ፐ ፑ ፑ ፒ ፒ ፓ ፓ ፔ ፔ ፕ ፕ ፖ ፖ ፕ‍ ፕ‍ / ,Pa pa Pu pu Pi pi Pā pā Pé pé Pe pe Po po P p / +ethiopic_generic,ቨ ቨ ቩ ቩ ቪ ቪ ቫ ቫ ቬ ቬ ቭ ቭ ቮ ቮ ቭ‍ ቭ‍ / ,Va va Vu vu Vi vi Vā vā Vé vé Ve ve Vo vo V v / +ethiopic_generic,ቈ ቈ ቊ ቊ ቋ ቋ ቌ ቌ ቍ ቍ / ,Qwa qwa Qwi qwi Qwā qwā Qwé qwé Qwe qwe / +ethiopic_generic,ኈ ኈ ኊ ኊ ኋ ኋ ኌ ኌ ኍ ኍ / ,H̲wa h̲wa H̲wi h̲wi H̲wā h̲wā H̲wé h̲wé H̲we h̲we / +ethiopic_generic,ኰ ኰ ኲ ኲ ኳ ኳ ኴ ኴ ኵ ኵ / ,Kwa kwa Kwi kwi Kwā kwā Kwé kwé Kwe kwe / +ethiopic_generic,ጐ ጐ ጒ ጒ (ጓ ጓ) ጔ ጔ ጕ ጕ /,Gwa gwa Gwi gwi (Gwā gwā) Gwé gwé Gwe gwe / +ethiopic_generic,ሏ ሏ ቧ ቧ ዟ ዟ (ጧ ጧ) ሟ ሟ ቷ ቷ ዧ ዧ (ጯ ጯ) ሯ ሯ ቿ ቿ ዯ ዯ /,Lwa lwa Bwa bwa Zwa zwa (Ṭwa ṭwa) Mwa mwa Twa twa Žwa žwa (Ċwa ċwa) Rwa rwa Čwa čwa Ywa ywa / +ethiopic_generic,(ጿ ጿ) ሷ ሷ ኗ ኗ ዷ ዷ (ፏ ፏ) ሿ ሿ ኟ ኟ ጇ ጇ ፘ ፘ ፙ ፙ ፚ ፚ / ,(Ṣwa ṣwa) Swa swa Nwa nwa Dwa dwa (Fwa fwa) Šwa šwa Ñwa ñwa Ǧwa ǧwa Rya rya Mya mya Fya fya / +ethiopic_generic,፩ ፪ ፫ ፬ ፭ ፮ ፯ ፰ ፱ ፲ ፳ ፴ ፵ ፶ ፷ ፸ ፹ ፺ ፻ ፼,1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 10000 diff --git a/test/data/script_samples/even_evenki_cyrillic.csv b/test/data/script_samples/even_evenki_cyrillic.csv new file mode 100644 index 0000000..643488c --- /dev/null +++ b/test/data/script_samples/even_evenki_cyrillic.csv @@ -0,0 +1,2 @@ +even_evenki_cyrillic,А а Б б Ч Ч ч Д д Э э Ё ё Е е Ф ф Г г Я Я я Ѣ Ѣ ѣ Ю Ю ю І і Й й И и Х Х х К к Ла л М м Н н О о П п Р р Щ Щ щ Ш Ш ш С с Ц Ц ц Т т У у Ва в Ѵ ѵ Ы ы Ь ь Ъ ъ Ж Ж ж З з Ӈ Ӈ Ӈ Ӈ ӈ ӈ Ө ө Ӫ ӫ, A a B b CH Ch ch D d Ė ė Ë ë E e F f G g I͡A I͡a i͡a I͡E I͡e i͡e I͡U I͡u i͡u Ī ī Ĭ ĭ I i Kh Kh kh K k La l M m N n O o P p R r SHCH Shch shch SH Sh sh S s T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ZH Zh zh Z z N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ȯ ȯ Ō ō +even_evenki_cyrillic,Ӈ Ӈ Ӈ Ӈ ӈ ӈ Ө ө Ӫ ӫ,N︠G︡ N͡G N︠g︡ N͡g n︠g︡ n͡g Ȯ ȯ Ō ō diff --git a/test/data/script_samples/glagolitic.csv b/test/data/script_samples/glagolitic.csv new file mode 100644 index 0000000..3979663 --- /dev/null +++ b/test/data/script_samples/glagolitic.csv @@ -0,0 +1 @@ +glagolitic, Ⱝ ⱝ Ⰰ ⰰ Ⰱ ⰱ Ⱟ Ⱟ ⱟ Ⱍ Ⱍ ⱍ Ⰴ ⰴ Ⱕ ⱕ Ⱖ ⱖ Ⰵ ⰵ Ⱚ ⱚ Ⱇ ⱇ Ⰼ ⰼ Ⰳ ⰳ Ⱑ Ⱑ ⱑ Ⱗ ⱗ Ⱙ Ⱙ ⱙ Ⱓ Ⱓ ⱓ Ⰻ ⰻ Ⱒ Ⱒ ⱒ Ⱈ Ⱈ ⱈ Ⰽ ⰽ Ⰾⰰ ⰾ Ⱞ ⱞ Ⰿ ⰿ Ⱀ ⱀ Ⱉ ⱉ Ⱘ ⱘ Ⱁ ⱁ Ⱊ ⱊ Ⱂ ⱂ Ⱃ ⱃ Ⱋ Ⱋ ⱋ Ⱎ Ⱎ ⱎ Ⱄ Ⱄ Ⱌ Ⱌ ⱌ Ⱅ ⱅ Ⱆ ⱆ Ⰲⰰ ⰲ Ⱛ ⱛ Ⰺ ⰺ Ⱐ ⱐ Ⱏ ⱏ Ⱜ ⱜ Ⰷ ⰷ Ⰶ Ⰶ ⰶ Ⰸ ⰸ, Ȧ ȧ A a B b ĊH Ċh ċh CH Ch ch D d Ę̇ ę̇ Ě ě E e Ḟ ḟ F f Ǵ ǵ G g I͡A I͡a i͡a I͡Ę i͡ę I͡Ǫ I͡ǫ i͡ǫ I͡U I͡u i͡u Ï ï K̇H K̇h k̇h Kh Kh kh K k La l Ṁ ṁ M m N n Ō ō Ǫ ǫ O o Ṗ ṗ P p R r SHT SHt sht SH Sh sh S S T͡S T͡s t͡s T t U u Va v Ẏ ẏ Y y ʹ̳ ʹ ʺ̳ ʺ ʺ̳̇ ʺ̇ Ż ż ZH Zh zh Z z diff --git a/test/data/script_samples/ignore.csv b/test/data/script_samples/ignore.csv new file mode 100644 index 0000000..2ccd6dd --- /dev/null +++ b/test/data/script_samples/ignore.csv @@ -0,0 +1,27 @@ +russian,At head of title,At head of title +russian,at head of title,at head of title +russian,Colophon,Colophon +russian,colophon,colophon +russian,Cover title,Cover title +russian,cover title,cover title +russian,Spine title,Spine title +russian,spine title,spine title +russian,On cover,On cover +russian,on cover,on cover +russian,date of publication not identified,date of publication not identified +russian,place of publication not identified,place of publication not identified +russian,publisher not identified,publisher not identified +russian,and one other,and one other +russian,and others,and others +russian,et al.,et al. +russian,S.l.,S.l. +russian,s.n.,s.n. +russian,I C D L M N V X II CC CL MML XX XXX XIX XL MLX MXL MCM ,I C D L M N V X II CC CL MML XX XXX XIX XL MLX MXL MCM +russian,I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI XVII XVIII XIX XX XXI XXII XXIII XXIV XXV XXVI XXVII XXVIII XXIX XXX XXXI XXXII XXXIII XXXIV XXXV XXXVI XXXVII XXXVIII XXXIX,I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI XVII XVIII XIX XX XXI XXII XXIII XXIV XXV XXVI XXVII XXVIII XXIX XXX XXXI XXXII XXXIII XXXIV XXXV XXXVI XXXVII XXXVIII XXXIX +russian,XL XLI XLII XLIII XLIV XLV XLVI XLVII XLVIII XLIX L LI LII LIII LIV LV LVI LVII LVIII LIX LX LXI LXII LXIII LXIV LXV LXVI LXVII LXVIII LXIX ,XL XLI XLII XLIII XLIV XLV XLVI XLVII XLVIII XLIX L LI LII LIII LIV LV LVI LVII LVIII LIX LX LXI LXII LXIII LXIV LXV LXVI LXVII LXVIII LXIX +russian,LXX LXXI LXXII LXXIII LXXIV LXXV LXXVI LXXVII LXXVIII LXXIX LXXX LXXXI LXXXII LXXXIII LXXXIV LXXXV LXXXVI LXXXVII LXXXVIII LXXXIX,LXX LXXI LXXII LXXIII LXXIV LXXV LXXVI LXXVII LXXVIII LXXIX LXXX LXXXI LXXXII LXXXIII LXXXIV LXXXV LXXXVI LXXXVII LXXXVIII LXXXIX +russian,XC XCI XCII XCIII XCIV XCV XCVI XCVII XCVIII XCIX C CI CII CIII CIV CV CVI CVII CVIII CIX CX CXI CXII CXIII CXIV CXV CXVI CXVII CXVIII CXIX,XC XCI XCII XCIII XCIV XCV XCVI XCVII XCVIII XCIX C CI CII CIII CIV CV CVI CVII CVIII CIX CX CXI CXII CXIII CXIV CXV CXVI CXVII CXVIII CXIX +russian,CXX CXXI CXXII CXXIII CXXIII CXXIV CXXV CXXVI CXXVII CXXVIII CXXIX CXXX CXXXI CXXXII CXXXIII CXXXIV CXXXV CXXXVI CXXXVII CXXXVIII CXXXIX,CXX CXXI CXXII CXXIII CXXIII CXXIV CXXV CXXVI CXXVII CXXVIII CXXIX CXXX CXXXI CXXXII CXXXIII CXXXIV CXXXV CXXXVI CXXXVII CXXXVIII CXXXIX +russian,CCC CD D DC DCC DCCC CM MC MCC MCCC MCD MD MDC MDCC MDCCC MCM MCMXCIX MM,CCC CD D DC DCC DCCC CM MC MCC MCCC MCD MD MDC MDCC MDCCC MCM MCMXCIX MM +russian,MMI MMII MMIII MMIV MMV MMVI MMVII MMVIII MMIX MMX MMXI MMXII MMXIII MMXIV MMXV MMXVI MMXVII MMXVIII MMXIX,MMI MMII MMIII MMIV MMV MMVI MMVII MMVIII MMIX MMX MMXI MMXII MMXIII MMXIV MMXV MMXVI MMXVII MMXVIII MMXIX +russian,MMXX MMXXI MMXXII MMXXIII MMXXIV MMXXV MMXXVI MMXXVII MMXXVIII MMXXIX MMXXX,MMXX MMXXI MMXXII MMXXIII MMXXIV MMXXV MMXXVI MMXXVII MMXXVIII MMXXIX MMXXX diff --git a/test/data/script_samples/inuktitut.csv b/test/data/script_samples/inuktitut.csv new file mode 100644 index 0000000..fbbf045 --- /dev/null +++ b/test/data/script_samples/inuktitut.csv @@ -0,0 +1,20 @@ +inuktitut,ᐂ ᐋ ᐁ ᐊ ᐃ ᐄ ᐅ ᐆ /,aai aa ai a i ii u uu / +inuktitut,ᐯ ᐰ ᐹ ᐸ ᐱ ᐲ ᐳ ᐴ ᐋᑉ /,pai paai paa pa pi pii pu puu aap / +inuktitut,ᑌ ᑍ ᑖ ᑕ ᑎ ᑏ ᑐ ᑑ ᐋᒼ / ,tai taai taa ta ti tii tu tuu aat / +inuktitut,ᑫ ᑬ ᑳ ᑲ ᑭ ᑮ ᑯ ᑰ ᐋᒃ /,kai kaai kaa ka ki kii ku kuu aak / +inuktitut,ᒉ ᒊ ᒑ ᒐ ᒋ ᒌ ᒍ ᒎ ᐋᒡ /,gai gaai gaa ga gi gii gu guu aag / +inuktitut,ᒣ ᒤ ᒫ ᒪ ᒥ ᒦ ᒧ ᒨ ᐋᒻ /,mai maai maa ma mi mii mu muu aam / +inuktitut,ᓀ ᓁ ᓈ ᓇ ᓂ ᓃ ᓄ ᓆ ᐋᓐ /,nai naai naa na ni nii nu nuu aan / +inuktitut,ᓭ ᓮ ᓵ ᓴ ᓯ ᓰ ᓱ ᓲ ᐋᔅ /,sai saai saa sa si sii su suu aas / +inuktitut,ᓓ ᓔ ᓛ ᓚ ᓕ ᓖ ᓗ ᓘ ᐋᓪ /,lai laai laa la li lii lu luu aal / +inuktitut,ᔦ ᔧ ᔮ ᔭ ᔨ ᔩ ᔪ ᔫ ᐋᔾ /,jai jaai jaa ja ji jii ju juu aaj / +inuktitut,ᕓ ᕔ ᕚ ᕙ ᕕ ᕖ ᕗ ᕘ ᐋᕝ /,vai vaai vaa va vi vii vu vuu aav / +inuktitut,ᕃ ᕅ ᕌ ᕋ ᕆ ᕇ ᕈ ᕉ ᐋᕐ /,rai raai raa ra ri rii ru ruu aar / +inuktitut,ᙯ ᕾ ᖄ ᖃ ᕿ ᖀ ᖁ ᖂ ᐋᖅ / ,qai qaai qaa qa qi qii qu quu aaq / +inuktitut,ᙰ ᖎ ᖔ ᖓ ᖏ ᖐ ᖑ ᖒ ᐋᖕ /,ngai ngaai ngaa nga ngi ngii ngu nguu aang / +inuktitut,ᙶ ᙵ ᙱ ᙲ ᙳ ᙴ ᐋᖖ /,nngaa nnga nngi nngii nngu nnguu aanng / +inuktitut,ᖥ ᖤ ᖠ ᖡ ᖢ ᖣ ᐋᖦ /,&aa &a &i &ii &u &uu aa& / +inuktitut,ᕴ ᕹ ᕺ ᕵ ᕶ ᕷ ᕸ ᐋᕻ /,hai ha haa hi hii hu huu aah / +inuktitut,ᕼ ᐋᕼ /,ḥ aaḥ / +inuktitut,ᓈᓃᒼ ᑰᔾᔪᐊᖅ ᐂᓵᒃ ᐃᓄᐃᒼ ᑯᐯᒻᒥᐅᒼ ᐅᖃᐅᓯᖏᒼ,naaniit Kuujjuaq Aaisaak Inuit kupaimmiut uqausingit +inuktitut,ᐃᓄᐃᒼ ᐃᖃᐅᒪᓂᕆᔭᖏᒼ ᐅᓇᓕᒃᓴᔦᒼ ᐃᖃᓗᖕᓃᓐᓂᕆ ᓚᐅᖅᑕᖏᓐᓂ / ᐊᓄᒑᖅ ᐊᕐᓇᖅᑯᖅ [and others] ; ᐃᓄᒃᑎᑑᓕᖅᑎᑕᖅ ᐃᓄᒃᑎᑐᕐᒐᔪᒼᓗ ᐋᒃᑭᒃᓱᖅᑕᒼ ᐋᓕᒃᓯᓇ ᑯᑉᓗ,Inuit iqaumanirijangit unaliksajait iqalungniinniri lauqtanginni / Anugaaq Arnaqkuq [and others] ; inuktituuliqtitaq inuktiturgajutlu aakkiksuqtat Aaliksina Kuplu diff --git a/test/data/script_samples/lepcha.csv b/test/data/script_samples/lepcha.csv new file mode 100644 index 0000000..d5b65a8 --- /dev/null +++ b/test/data/script_samples/lepcha.csv @@ -0,0 +1,4 @@ +lepcha,a á â i í u ú e o ó ka kha ga nga ca tsa cha tsha ja za nya tra dra thra ta tha da na pa pha fa ba ma ya ra la va wa sa sha ha kla gla pla fla bla mla hla ṭa ṭha ḍa ʻayuk kak kang kat kan kap kam kara kal kya mya klya kra gra,ᰣ ᰣᰦ ᰣᰶ ᰣᰧ ᰣᰧᰶ ᰣᰪ ᰣᰫ ᰣᰬ ᰣᰨ ᰣᰩ ᰀ ᰂ ᰃ ᰅ ᰆ ᰗ ᰇ ᰘ ᰈ ᰙ ᰉ ᰊᰥ ᰌᰥ ᰋᰥ ᰊ ᰋ ᰌ ᰍ ᰎ ᰐ ᰑ ᰓ ᰕ ᰚ ᰛ ᰜ ᰟ ᰢ ᰠ ᰡ ᰝ ᰁ ᰄ ᰏ ᰒ ᰔ ᰖ ᰞ ᱍ ᱎ ᱏ ᰣᰤᰪᰭ ᰀᰭ ᰀᰴ ᰀᰳ ᰀᰰ ᰀᰱ ᰀᰮ ᰀᰛ ᰀᰯ ᰀᰤ ᰕᰤ ᰁᰤ ᰀᰥ ᰄᰥ +lepcha,ákâ taʻayu ʻayuk ṭe ṭhi ḍi kakyak krya klek katuk jer món úng renjóng hrún míngzat̂,ᰣᰦᰀᰶ ᰊᰣᰤᰪ ᰣᰤᰪᰭ ᱍᰬ ᱎᰧ ᱏᰧ ᰀᰀᰤᰭ ᰀᰥᰤ ᰁᰬᰭ ᰀᰊᰪᰭ ᰈᰬᰲ ᰕᰩᰰ ᰣᰫᰵ ᰛᰬᰰᰈᰩᰵ ᰝᰥᰫᰰ ᰕᰧᰵᰶᰙᰳᰶ +lepcha,Róngríng kakha chomíng Thikúng Mensalóng,ᰛᰩᰵᰛᰧᰵᰶ ᰀᰂ ᰇᰨᰕᰧᰵᰶ ᰋᰧᰀᰫᰵ ᰕᰬᰰᰠᰜᰩᰵ +lepcha,1# hánmúre go áre múng me yangsá áríngre gá chet báshá #2# káyú róngnure múng me yang línbúre káyú muzuká thi zókdok káyú mákre múngre zo me li #3# shenlá múng línbú káyúsá áríngre rangsar̂dom lá káyú,᱁᰿ ᰝᰦᰰᰕᰫᰛᰬ ᰃᰨ ᰣᰦᰛᰬ ᰕᰫᰵ ᰕᰬ ᰚᰴᰠᰦ ᰣᰦᰛᰧᰵᰶᰛᰬ ᰃᰦ ᰇᰬᰳ ᰓᰦᰡᰦ ᰿᱂᰿ ᰀᰦᰚᰫ ᰛᰩᰵᰍᰪᰛᰬ ᰕᰫᰵ ᰕᰬ ᰚᰴ ᰜᰧᰰᰶᰓᰫᰛᰬ ᰀᰦᰚᰫ ᰕᰪᰙᰪᰀᰦ ᰋᰧ ᰙᰩᰭᰌᰨᰭ ᰀᰦᰚᰫ ᰕᰦᰭᰛᰬ ᰕᰫᰵᰛᰬ ᰙᰨ ᰕᰬ ᰜᰧ ᰿᱃᰿ ᰡᰬᰰᰜᰦ ᰕᰫᰵ ᰜᰧᰰᰶᰓᰫ ᰀᰦᰚᰫᰠᰦ ᰣᰦᰛᰧᰵᰶᰛᰬ ᰛᰴᰠᰲᰶᰌᰨᰮ ᰜᰦ ᰀᰦᰚᰫ ᰕᰫᰵ diff --git a/test/data/script_samples/mongolian_cyrillic.csv b/test/data/script_samples/mongolian_cyrillic.csv new file mode 100644 index 0000000..4912e5b --- /dev/null +++ b/test/data/script_samples/mongolian_cyrillic.csv @@ -0,0 +1,8 @@ +mongolian_cyrillic,Ӕ ӕ Ӑ ӑ Ә ә Ӓ ӓ Ӛ ӛ Ԙ ԙ Ӕ ӕ А а Б б Ҿ ҿ Ҽ ҽ Ћ ћ Ӵ ӵ Ӌ ӌ Ч ч Ц̳и ц̳и Ц̳ ц̳ Ԃ ԃ Ԁ ԁ,A͡E a͡e Ặ ặ Ă ă Ä ä A̋ a̋ A̐ a̐ Æ æ A a B b Ćh́ ćh́ Ćh ćh Ć ć C̈h c̈h C̣h c̣h Ch ch Ci ci C c D́ d́ Ḋ ḋ +mongolian_cyrillic, Ԭ ԭ Ԫ ԫ Ӡ ӡ Џ џ Ди ди Д д Ѐ ѐ Є є Ҍ ҍ Э э Ӭ ӭ Ё ё Е̦ е̦ Е е Ѳ ѳ Ф ф Ӻ ӻ Ғ ғ Ѓ ѓ Ґ ґ Ӷ ӷ Ҕ ҕ Г г ,D͡Ch d͡ch D͡Zh d͡zh D͡Z d͡z DŽ dž Di di D d È è Ē ē Ẹ̆ ẹ̆ Ė ė Ẹ̈ ẹ̈ Ë ë E̦ e̦ E e Ḟ ḟ F f Gḩ gḩ Gh gh Ǵ ǵ Ğ ğ G̣ g̣ Ģ ģ G g +mongolian_cyrillic,Ӿ ӿ Ӽ ӽ Һ һ Ӏ ӏ Г г Ѝ ѝ Ӣ ӣ І і Ҋ ҋ Й й Ӥ ӥ Ї ї Ј ј Я я Ѥ ѥ Ѩ ѩ Ѣ ѣ Ѭ ѭ Ю ю И и Ҹ ҹ Ӂ ӂ Ҷ ҷ,H̄ h̄ Ḩ ḩ Ḣ ḣ Ḧ ḧ H h Ì ì Ị̄ ị̄ Ī ī Ị̆ ị̆ Ĭ ĭ Ị̈ ị̈ Ï ï I̐ i̐ I͡A i͡a I͡Ē i͡ē I͡Ę i͡ę I͡E i͡e I͡Ǫ i͡ǫ I͡U i͡u I i J̄ j̄ J̆ j̆ Ĵ ĵ +mongolian_cyrillic, Ӝ ӝ Җ җ Ԟ ԟ Ќ ќ Ҳ ҳ Х х Ѯ ѯ К к Љ љ Ԕ ԕ Ĺи ԉи L̦и ӆи Ḷи ԯи Ļи ԓи L̤и ԡи Ли ли L л Ӎ ӎ М м, J̈ j̈ J j K̀ k̀ Ḱ ḱ K͡H k͡h Kh kh K͡S k͡s K k LJ lj Lkḣ lkḣ Ĺi ĺi L̦i l̦i Ḷi ḷi Ļi ļi L̤i l̤i Li li L l Ṃ ṃ M m +mongolian_cyrillic, Њ њ Ҥ ҥ Ԋ ԋ Ԩ ԩ Ԣ ԣ Ӊ ӊ Ӈ ӈ Ң ң № Н н Ѽ ѽ Ѻ ѻ Ѿ ѿ Ѡ ѡ Ӫ ӫ Ө ө Ӧ ӧ Ѹ ѹ О о,NJ nj ŃG̀ ńg̀ Ń ń N̦ n̦ Ņ ņ Ṇ͡G ṇ͡g Ņ͡G ņ͡g N͡G n͡g No̲ N n Õ õ Ọ̄ ọ̄ Ō͡T ō͡t Ō̤ ō̤ Ō ō Ȯ ȯ Ö ö O͡U o͡u O o +mongolian_cyrillic, Ҧ ҧ Ԥ ԥ Ѱ ѱ П п Ҡ ҡ Ҁ ҁ Ҟ ҟ Ӄ ӄ Ԛ ԛ Қ қ Ԗ ԗ Ҏ ҏ Р р Щ щ Ԧ ԧ Ш ш Ԍ ԍ Ѕ ѕ С с Ҫ ҫ, Ph ph P̣ p̣ P͡S p͡s P p Q̀ q̀ Q̂ q̂ Q̄ q̄ Q̇ q̇ Q̈ q̈ Q q Rkḣ rkḣ R̆ r̆ R r Shch shch Sḥ sḥ Sh sh Ś ś Ṡ ṡ S s Th th +mongolian_cyrillic, Ԏ ԏ Ҭ ҭ Ҙ ҙ Ҵ ҵ Ц ц Т т Ұ ұ Ӯ ӯ Ў ў Ү ү Ӱ ӱ Ӳ ӳ У у V́и в́и V̈и ѷи Ви Ви ви V в Ԝ ԝ Ҩ ҩ, T́ t́ Ţ ţ T͡H t͡h T͡Ṡ t͡ṡ T͡S t͡s T t Û û Ū ū Ŭ ŭ U̇ u̇ Ü ü Ű ű U u V́i v́i V̈i v̈i Vi Vi vi V v Ẅ ẅ W w +mongolian_cyrillic, Х̳о X х̳ Ѵ ѵ Ӹ ӹ Ы ы Ж ж Ԅ ԅ Ԑ ԑ Ԇ ԇ З з Ђ ђ Ь ь Ъ ъ « »,Xo X x Ẏ ẏ Ÿ ÿ Y y Zh zh Ź ź Ż ż Z̧ z̧ Z z Đ đ ʹ̳ ʹ ʺ̳ ʺ << >> diff --git a/test/data/script_samples/mongolian_mongol_bichig.csv b/test/data/script_samples/mongolian_mongol_bichig.csv new file mode 100644 index 0000000..67c871e --- /dev/null +++ b/test/data/script_samples/mongolian_mongol_bichig.csv @@ -0,0 +1,6 @@ +mongolian_mongol_bichig,ᠠ ᠎ᠠ ᠎ᠠ ᠡ ᠎ᠡ ᠎ᠡ ᠧ ᠢ  ᠢ ᠣ ᠤ  ᠤ ᠥ ᠦ  ᠦ ᠨ ᠩ ᠨᠭ ᠬ ᠭ ᠭᠠ ᠭᠣ ᠭᠤ ᠭ ᠭᠡ ᠭᠧ ᠭᠢ ᠭᠥ ᠭᠦ,a -a _a e -e _e ė i -I o u -u ȯ u̇ -u̇ n ng nġ q ġ ga go gu g ge gė gi gȯ gu̇ +mongolian_mongol_bichig,ᠬᠠ ᠬᠣ ᠬᠤ ᠭ ᠭᠠ ᠭᠣ ᠭᠤ ᠭ ᠭᠡ ᠭᠧ ᠭᠢ ᠭᠥ ᠭᠦ ᠨᠠᠭ ᠨᠡᠭ ᠨᠧᠭ ᠨᠢᠭ ᠨᠣᠭ ᠨᠥᠭ ᠨᠤᠭ ᠨᠦᠭ,qa qo qu ġ ga go gu g ge gė gi gȯ gu̇ naġ neg nėg nig noġ nȯg nuġ nu̇g +mongolian_mongol_bichig,ᠪ ᠫ ᠹ ᠰ ᠱ ᠲ ᠼ ᠳ ᠯ ᠮ ᠴ ᠽ ᠵ ᠶ ᠬᠡ ᠬᠧ ᠬᠢ ᠬᠥ ᠬᠦ ᠺᠠ ᠺᠡ ᠺᠢ ᠬᠣ ᠬᠥ ᠬᠤ ᠬᠦ ᠻ ᠷ ᠸ ᠸ ᠾ ᡂ ᡀ ᡁ ᠿ,b p f s ś t tṡ d l m c z j y ke kė ki kȯ ku̇ k̇a k̇e k̇I ko kȯ ku ku̇ kh r v w h ḣ lh zh zr +mongolian_mongol_bichig,᠐ ᠑ ᠒ ᠓ ᠔ ᠕ ᠖ ᠗ ᠘ ᠙ /,0 1 2 3 4 5 6 7 8 9 / +mongolian_mongol_bichig,ᠮᠣᠩᠭᠣᠯ ᠤᠨ ᠪᠢᠴᠢᠭ ᠦᠨ ᠰᠠᠷ᠎ᠠ ᠶᠢᠨ ᠨᠡᠷ᠎ᠡ ᠶᠢᠨ ᠰᠠᠷ᠎ᠠ ᠨᠡᠷ᠎ᠡ,mongġol-un bicig-u̇n sar-a-yin ner-e-yin sar_a ner_e +mongolian_mongol_bichig,ᠪ᠊ ᠨᠠᠰᠤᠨᠪᠠᠲᠤ ᠳ᠋ᠣᠯᠯᠠᠷ ᠮᠠᠲ᠋ᠧᠷᠢᠶᠠᠯ ᠳᠡᠳ᠋ ᠡᠳ᠋ ᠠᠪᠢᠰᠢᠭ᠌ ᠠᠩᠭ᠍ᠯᠢ ᠨᠣᠮ ᠠᠴᠠ᠂ ᠭᠡᠷ ᠡᠴᠡ ᠪ᠊ ᠳ᠊ ᠨᠠ᠊ ᠳᠣᠺᠲ᠋ᠤᠷ,"b. nasunbatu d'ollar mat'ėriyal ded' ed' abisig"" angg`li nom-aca, ger-ece b. d. na. dok̇t'ur" diff --git a/test/data/script_samples/new_csv_files_2025-01-29.zip b/test/data/script_samples/new_csv_files_2025-01-29.zip new file mode 100644 index 0000000000000000000000000000000000000000..6d24137f0c51bf53773220398198d7a9e927be81 GIT binary patch literal 34749 zcmeFZWpo_Lwyi5>W@biq$mRlh6V%$_`NdX(*^p=4;l~=kcp)k;9+lI;^k~*Yinh~ zVB+HL<_QY~4EBEFABCDKJP>44ijUQMP$&CXnI*2vqZ*-!UrBj}eFy3UGj56b$|)%9 zAX`(!Qczg1P^=qZtP>Xw$xBERNqH?OR8qc?MEvCLiDb~{ALKz{Q`hD3HafwP^wiX# zCBN6ZGV1KdTcbMXO!y_HJD=+wz00jS;<7-kIeJrceg&^JJY@dG5?-f|`-F`>=&}%j zN9AIoa5{Fo<(`pZc~*@~dTtinCqPvPlVhkFpa z!n74M@2eHH`>6&DYm<4)*#-!hxUOJ~`tg(4ZDv!wp`{s#3vTUx`+UR|HrAeTj(Z@# zZJ#jyXX=JH6}SUzNh->VW7hU+DLk6A7GZ}3(jy=X&Jit`^=}_FA{Nj~vuPjL9;`;x z?pnl&vxwo?5Bsjk+4Ra2A#kvHbMx!2z6jDJ4RMw89eLzt% zbL_xxuax}^kzp8)GeFM3jAFbj5~FBg!*>qWu=sr|#5mWu-&8n3>UOu1%;w?Y^+(c$ zU#9ZCHN5K_0w5YIj6!OpK~rJGPyr8hiYgm~it^P~lH4z>1g(m+p2ASh%YLNtM9 z5#_qUVE4IWGJ}}c=&xURGQh?vpIUvqOMSe*^yKtRZle}lk>8DqQbv}e4)w|yVo(9Q z^zzmNb%LaR60=_BnPqN9-(kCk&;%dr#i>W=1ZV#QQxDw<+WLv+nqURO{gYlk#040O? z;)x%mkzaxb38cnQS`kw7X-3}~NA(?lk!iK6ErvpS234c#BbT`Psj}(qNWVN5rqmo6 zPBen?Bss{9lL$S{nPoTx1vZ)x{D3U5wW>7U3}i#bT4sCtkYdk9E%{`QqH2}KoZdd1 zWtanj5HmZD#TqAc_qW26$tk9G9{!6|+X?$-L5FMHr2$E>JKCuv&DTUm)IFf+G@Aw1 z#Yl?H2>BuCX9XWf+yaut^Ex#j8}gj#@76ArA=IlmE~zH&)5E_^2IqP%_gX2c_y2aglpF1nc1@3(Yw}9VT;*~`#4tSa{hHv zmD$0!oTV3C{OXk6ntAp%#e0)x9YSl0&;~L!`1Gf{n&~m-`8Z?txAE@Q3u8w&kC_)m zyR0C-tpvh~JS3?MCM}G$x}Ahn(1h=m-4cs^k7S_qynN_p5@gG?Z~E2 z8aGNGR1LP5gxgr!Z9NdXM)u@<1%>+V>A=nJyAZ|bKnB#o)a;z*H zY3lBphoOMVcJw?UeCMtbAEtp26bL9B;;-E4>i*6h*1x#3GIBMrbFjB?ur)BYGO@I> z_$LUN0PwwsdX$e>v_^>NO#v3 z7Pc~pp{=YlAe(|5M(n*9oL8F*tSKbK#U5Mud45;3T|IFMw?>;Ple*;PUjp*ReL@nC zwWZLABXXz1H5Xh86`HSY+%%%dtWoiqcvL$?6($H4>5X;9$#9h6h@7bi3CHdVcyl0( zk^PYAk@b-IWct)cs5$HL5AZn4aIXy+i3Oo}LR~ zhRP5eu|B$SK*V8HL@&hC>TYqCm22V*>_66J~eHS zc8~2Q)KjLDbPJU*JWjK@c88{Rm)m}a!hVJ#5BFzKnpMWNXRYlM$w9;uh3d=4BM6ZzB5}DH095LtgaU zq_%Xym|MUn2010wrY>K?$Jw0s9SwRh{2aK>ZVi|IKZ118V<&gi3+LNamdIw~0zcO)KTD@~nL3@RCn{2yvGKa=<+ z^#ic?X;7d}UnGw2unA)%2wXWR_N~zt;!-WDluA%izPh6NiMGv9A{0%Zuxm?r%wW1= z`ib3Pt@or|!*5D#&w?%pBP!wY3FAK$FnC9Q%%?n|^p5;kU||VLZ!}d%VNSsJHCu%7 z!||-l1o-`!5kuj!$V8xs172xBD!w^#4d(>;J%$T**MP3j6_ds+7~!_J zvg^}z^?6BnJ7}mAj`8z&g^>y|Aa0R^7rUHZalRw2l14Myw~#dyAvL9Wn)EF=-|?$> z#=T8N#)!Ma2$fyLmQu7RAD+z1+Uzl7Ka88NFWUK>gz~utW#g;47CGxo8XG%VGtI_| zP3bZwJupXrSAdUB5p80>YEfKVV`ml0+3v3fcMYG6Z1=46VE)Y0kdXKY`ksCz3)8z? zfCDM`&4En+E&VY6N&i1-A0;5`@9{Cx$Ljyj`u{hxp0E8o^zeUV-PXd$*~-ZNpAvr& z?ql^HR)CO;Uzb-A0^knlTwDP^qkvc0RT{6h;OPnG~0TI+aoN~i3-e!YRB1Jk^x>r*$gb5}XO?y6=JrZs;E3AD;#GXZ% zFz)k!$uss)k$6NnGt;t{Ad>=mUx6HDl=_|L8pUWYTsG85+r5Alc@${p>eEm`o=$MA?2EeqD7_)*#4k51t~W9}T@ zfLPnBG$Lz7Rj+BRZHL>v@>%1rg#><^Zh!0dB2Cj|YoRgvDr~`^A@6cHxVNxZ%q&m4 zPnIU@LD{lK>IVQWTSFD;cQdy7oZ0L2^Gj7>+28T+3q)>qOOY-DX< zYUXZaZ}gtwf6A{&sE?H>AbLUoV5hDt<4T~3ZR;KSpK$b)M{>F|yy0jBfq@2*(a}{- zBq8>HVvOLy47wU#ik96s<=*Yf zG3FG94sG}Tv9Cin6ODbllGl1nq}?O_Bf=|#u1%Z7kU=^D@fkh+cF<)}=^K!DxG$8E z-i;(WA+>wKS1J>$8?e}tL(BsZnu4+!7E8ouP;Z2;UYyQaPw1{deG!NJwi*6=f@dJV zVE&2=3H(aB%a}+u7AD+Yeh%j=2G2AL#g;s^m#xX&N|VwjqAG+1j;`~Q4?(aT7@ayp z=~ix^bkWn*bhh-WL%GqIcHLEu`S3HHwq(@wKFP2!_{DX@ERyS{PkD31z*c+I`HKE~{f8?*KYdb-*Rp$BUTx;lv_?d;^swFhgdz6~_{`HQ(nxkn~FZa{|ck<@{9 zEp|(n^{oSIdP~C!Sg%&hA1yoY3FiH}Ep>f*@8eHrIpw;2(@vQT1~txZ60G+%5Hes& z6HlqV<(tT&DdoqX1>Ly+4# zXL7qZ8sZ$(WhDhK+R0~KUU`m>`=MYMght;`+BFYhbs>VN(S2)Pu z5@;K2q<=~vyc7*iQ8iFXUxcE()Gg%vXupZ!`PS`bv7z-N=Vj~X&PUhdjpVFM=dz7a z)L$U)5<|7QuLuITyHo`8*Urm>TRXPsVUeVcVWw-r!o7B#jaO=lfS>^kVC` zw_p78Ecr!mIB$G!tm}5}{+6xkKNAhEyWCB5HyrnGOK~xIu63^U9_>Ca*~dopF!6~m z=xBF?Zi>nMByc5sg6jXW7eK?Wd?AFWG(5M5j3+Z5Hv+CI99KXIP!q^|H1AM0PrVyF zZ{V(AeWAJn&=dH33~$ITjnz1AWvvZD%yV;N_FzBDgDt&lhUxq>fUb1v^s0T8y)#nB zuXONgD;~gO*Z}nOaE4{8L=Gx@tJfZC4PsoWjQhWpPWf=N?sukDegR4|*L^1NL$4hF z{2}LuJqG4np$Fzowg>)|dY4WG80Z;o<_(Z>Y4-tUl@cE4Fx=C|Dhp-Ll8j|3yB+G< zr@$ABVuRkRavRb*5djeaA)Oiad)#~4d)OsBVSf^X&|0FRn-can96SSCC-m;+>nV-% z8HnanUmf@XLBAdC+7N44+O^skHofJc&M2?8wZPWRUU)@9L-yY4AVkoDz=w*;n0bC2 zcUvbDUEgSR`z$?{4*OQKRx9^Wf=AOP!pYi9J*Ey@nKx#h*246ftZ(VxGWnQsGvFuL zU8@e)V)430X2W>dNgQ_jC8k?2xh^d^ZW$Qptwo%_N!HSjHt1$0+Gsf2$!t}Oaay*N zwU<3DIN$?m1{p^h>ez*~=v04ebkAxTMTPLslqJZd>l$!PW6WH*WZJaw{vb8axNYJ+ zf_anT?OVKdkV0A-SYhftaUX1z_pUSDUbnRT7T$gL|56#yKXhh`sVlGe6w#kmii(3b zme7`@1=if#UQ7Wp^(e%2zNET#RjZVXzzk}j-L%!*bIX=~Qkjjw3988Rvlf+RgIJNd zU!6u6#nt!p)C+R~godSPk!VhkpL@=DuHBq<1Gad3zDi-%uCvt^OH4X!|C_?N{2?^I z3CH~ZD}`bGZwgcD2n{=0`X7ZM|3hI?|3P8sZs14H%90kx_ev4QM{VQ6yqZ=`E3Dt8 z1xvJ#2nIr_oEQeWuwHo?zA%1Fi%%e=7{#Z2@RGX0LHdC^X1a5^WN36Lk`KP@c*x z^yS*+9{sjO`&$FQ)HgLw8-}%Fu@+Ywhb6FG^!H^Z{|LZ^5myODkfMp^F2EQty`UoG z!AaJ`<~bm9w}fq6?~VPKNZQePLDNS8bEIY(CjVAoyHkh}Nb%ZTp{dVIV<=M__)Crc zCbb$O=LbChhsT*6(x5naex}ET9hRVJczfK943n!ZD7Rxzh9S1k7LMD7(}vqSutU(A z{yut_;jF78X30v#RIMYmG)3ae$wrt9B(RHLtH&gVj3`^FQ+GcYm?lx7PM65~0%wY; zvE~0#h+<;JIG)={=rakTbm49D*MLxf?`D4--q8uVf(yMX+)mQb(r-HuV60-3q#!L4 z4jHkfLUpJ?&G0qdflybUREmg>9LajR=^xrsmEh_l2=haL)DD15hhu1A|0JvnY+#@=y#WP0vZ;9&fCVRswe1D}wm zZHh^s@K@Wnc?;sJ?D|gh^B8l(p-Xe+V%&je|K7>f$2%2!E zU0fh-++*TmaXdg@`S~Z9y+`xvqp{)P%we#amh`bDsI&lg_TK0h7c0qja#C z#vOgtU#ftzSB^|@yC;Hs&8r77isCTwLg)(+`Y{#_qavHc7MBW{ZH-E#Dw15KbYpPH z*2#Ft6koa_C&M5-;(o!d0j(k7@Ls$kH#0RuHw!pIvp@E__nWV!<}Xj+`4+41lB!+C z%5@kYS48>YFa*L%6RF*Z@}f@;X4l zcVYkPA1wdI@*jg9|1-ve0DRvgKL{|c^*4-@a}ncAKtTG5N0KGmZD8Q7l3FF{xd=Je zsKHR25`r?>8F#{V;Gbb~avGMKf(EC4`8S;pJ;W}*{di%@U@*%TW?;J0X1S`%^ztEl zA%i3fBaY}evmyETIf%HmgDSkph@ru{4!?YHeo2J1B0Wj{=v{3U1UXT{+9nFBybh2Kqe8U z$-cbA@dE@-TmZKlm5HFdv7pX^5h!R*VwH$cm~?$a;64p^{yb(fym$FfKJ+n%S`_Tf zp2I2bjz61+hYV)m1z)(TN9=LvJ=hO$L@=K~ zzdp=RRLAqXb8n{RQE>*7Xepr_G4~;?`3`G1o_q`~Dqi!+aMf&glPqIW-6I)W6qCL7 z=mUN-~yJCIc&RmctNYgW3u9E|CH(G#GgEcPQ8C%vKwk8uFNR%^5lg9-I~1i=>H0 zSEg3niA+eC-WWUL*4;1}DBEFaTY#l<_(X5k+Jli|5Ovll7P3?bV{4RP^-ixh2AgE7 zl)Gg9g&$%jB0K@!FU)4*W@J{+jwcgKR7;JSw_f6v9Y|W}^K=h1Ke?lwvKAPD8gHZ9 zW}+vMkd?AsB2Gj2;>z4gi?ep`0{u#jvlj27txAux2yJBZX4t^k=$}U}g8=Xb145G6>A$tb0EiV(Gd_J>1Q8Y+-Tn{UY#R6ob2ewfXh(^K+ssCUa_%j+AzZtMM6I z>6QW0_XjBiNd&1_$yh0QNqMO$$tfuY$xJeOGy%L-3K+(dc)(XkMi8OkEHy`o9j}`J z_ud@K)k;zP2jyD@g|0|vo$HtGh+sQ#H6shNca)*5mDQOH&EWwkCjd|m{uAYpf1_+h zW&DnER+a4-;G7>mkm%o4jzOFe9W8xG+EgLi*=BE= zJ<>~GU7cBkhMkhoc6a-_xTRdi)yE$OiV&t90paGhpz?z$3>BdaZ_3=9>v$6rFB4B2 zg4fX=7s(ya7MB!#>F~9U3;NoJc-1Sc*rt}Ue40<0XOvnpP?TqwqQ!0=qxEf08g6%A z2pBhQPmduinVrGgQ}ndAmhcZc#K3bL7(idKhsS za##bfHnM?By5qlb?kd0chTgccY`Zz^9V-}zu8JIH zP6|Yd2Lfrw2C~}3@a>DFjMRPt#zNHWDFi$23t1CgPTV(&rn6QMN0akGcHaW%fJx0r zdh@hn{80FU*nS9fqoB$rp~IzoZd=0&sft8^NyJ%{beD9O%76R=XL~{b&iu{5te#s> zB9^34mN`xgI6y2P7&t2(ns-+^Gin;W7Kb&xy=#RC!UShhT4% zJ5lO}aBtK*Q56OgHMO;MAOC2K&5k4)9RM^R|3>q)((g2*186q#Ffz5Y`oHMTYk2us zX#xT)(A&yQecTyU0&P=eNMHjWSB)$FM$81_Bt;L4ivis zTqbas$7&}Yg1`*WkznQTbY8+FT9WLJ)f=MNN&(+Qe@%bQ{>ZM`nKf|1m>d@GEOWurtLNGO}TnW@GXT%zg>6|-C-*US}atjml?6p$`#iI zdI)Aypivsk!AMUO&SRL6*6^79d8M@*-JY!B8b*viurx?Gn9$L0&m3PZT-G|_$F z5z)_wnL|X;1STMU7bgi)j1RnP)OI?MOF#<#hzIDe2nO0-;E(W+=qo_%0q4DJJ2XcZ zpbq(rK$r!y&$`}$_gw?KOC3&ho@2Yq2u~z33oEF(#gq8^-KZu-XD0T84=$1V&k%aJ=q&0k;{eosl74B{#gV zUfEmCZTCC^48R0F8uAOD+up)*wK+_u&;(R+`AzZ7&kK7zvpyg1$h5Q`tkqtAi6K01 zaj#iVuZnKOWgT;vqgn^s25|^*2pa0(8sjhI{gkX@ybXNWkJ=5n5w->SfzgKuD$aDh zS7aqUlMlqi`VE^K)Om;L8f*pl4J;RA`UB*+*>nJAugnhmHO>n77Rc47AH9$P9~Zg> z52vVfl#EM&PgKa=T>F$VfrjjrFSslBY0OBx8L1zFm+@Q7(g#9tkMZeEWC!sDF|jL+ z2bAit#^S+4*3~LHm{mnKV`^o4$#K%ENoD%Fdy<1XhZ z=NaOz7xFtBxGFj;vfg_Ea-0zoYVaOx8|a?2k8Yd}xP>pVkzP(&N?A%~I0ZFmewI3| zoI@`e=RRaC0R@PXn3al^3_z3=t@4{`a2Zl!i!CK0)r$)--$luf<{Lnia1z}eBdGbL zN~kMb0itv&LlYxa!eZ6*E=n4%8uv<91X42u;OHo|TcuuY6YI=ZtzP?-_h{imj|Ddi zJf)Xw!X{U_6)&X((TfLr>!k#1!mY*Gq_en;T&yV>V{&}#-v<|BE6Pf9SI71RZrUp_ zOZ79o&CXf?O64He#qxb?A*>Q z3Mr^vlH0;j--=6p7TyAX@I$sU4YP~uT)pxFI z|Mje1?9YRq%zr!WuLuAA%75C$iw(GJ{m=b0VU&F~CVkX>6XRD=V_&|x;*{iE^5L|I z8<&JVJh^CtST*ITrNKvLrf%8jw*PEBWuhI{VG_p5NK@wfyB}Z^T(e%hN>(Lj7xBC% z6PEu~JTXh~AB`}kb{~yM@KB!*3?!T294IOC! zcUkq#qcNCWc1`RqP`RhiDZM)Jc@8MGw2qXYDahOPPF~PW;l5k2xzB9*g&oGv1`s8e zOS;+njMEGj3yU`$LV*XycTt+E95QACh!Wv%qNMvbQF?bt##WYq!PNf?mlOcdqxbmi z4j5NeAGODoKm6$e|GnD8teiU8f z8HUCfY5W3tMCv(3-V?8($Hyir40`w6j`%QjrR~%Db?_CCM?oCW#P}t2Kj|ugBO!aKIbv^m zW>47ECp=FmUXX`??SQYns1tq8s{pTbjpqs635M`Vqt|8!{#res(2Y#7U0`Y{Mbiq( zTSEpv$0eCU&?z1bGB(vQg&El|9WAV1CzhL;Zl^z{DR$`t#U4UI6r5Vaw{nAM(~-db zZ(kPpu+IuE#WW~(P-0}35=wR$_J!L|ED11G*Jr(WkIN4&7iXpWuy*Rghe6{b9KW_i zDPi#qgFP>dqR#7IfH$kGwL;qh<-QS&(muj>H{a4+qh6ENgWH1kfb;+fm`o90L*9$+ zps;G^$(+}Hy0LB=-|5OE)u;vqSfTb(q|kutPdE#o)_bEV*qF*>^cM6MHjQCdN`c{b z5EbDSTdv)JI4$$@SJK}O>us8B_AmVMeRWPF=hRB*OXNympL-!|J}Q2xc9l9Ulz&n= z)s$1$)>u5<}DYn)d!k8z`p}djsu~<*nSXxH;REkDU zib-!RQQAP+N_LIr_CxqO>L%*X1dcc?%cww@8+C`Tow!HeGCk%y&!G80jFtFQ<+{wy zTIp520A7jBtxh>9>MG-l1MF&|bGT@owzKIUCdu+Fp~6okPL8i3rApwYqQasyH_O}T ztfT@|rB1es{rm93y0qNKry}XTS}r`{6>;I?#e3v{+8`3@&afRmQ9D(n@VUd@A@|tB4}kXGTyckOel3VM1pQg>ZL9 zP;NS*#;*DmWedvMYA_b2NKyxL)2gC6Qi}gzySKz*g$MYfmT07D}=zP(u`>@rqMtM)|C!RNtc~9;qW-!ljPd_;` z0hd!~MY#K%%&MF7C2GOpK_})H_B=smPEOX#CEV=A>1zAD#nGEi9`DT`dgO!Vh)Mt7 zOwwnm-+OHc?|NirYvk3>@*+(~hGD2)N@Z0KMct$8`Y=O{_AEX32=MT{d|2DcsGfdw zaYB1;!qyV+CZZkDm?GMa_Zgw^8NbkSCwz*nCJ~6Yft*99^m3FGK;;wtNE{bWFegm! zP=vuR{yc(e8ihK-ev9@L!z=PeG@Z~h0&5DXB~oYJ_=smTH|2sI_i0~uhw6AlrR>95 zO^SFILx@pZvLa~-#4dkGthIP<(OUgEIa)E=%tl#h5sn;85V&v^NWoL=Q|x5wB!*b& zw-pvy$AqK_<-$H{2B%D};jah+=9n#z-Vyo|OS8BmDAqrGLKukZa_%W^hoy8zbSUje zapl(NO>_uh6QD^*=%hKb58V1$6g++{T-ur9cv#%Enf{ zdzrkR&ia)(2=;JQ<9rr-{Zi(07+cc!ImlTn_semi1=5tYv6lW9WU#}zoaFgi-3_OT zxSqh*_{ey@FHEK`9I3a}ogh9$hv+Fshb5nh8GSQwg#h<&O;l^g?v~u~vO0<+W!-*J zDA@l3t)y(Ii4DwLVVm77uIeWv+{u*+w!zYptO+(XUS~26$`xEhTL@6hEjE2g+^QuR z6_m+JTd@^wccp)0+WR(aIw#FOA19z4rT$c#J*)a8igRI}^s{-C-?lN2ZB;_<;f~_v z^QgTPm(YcNvutNVUks4DcztP~J^QLc*2MtDi&m1~f~kN*V_en+hNcV0E0OFaSELE` zrNN&3klf%;osEpo27_`#lY_+j0wQNqPeHLS83GE~%5r&Sab2&MQ~~~3*L%daeqd1q z`~HL@lsY;(jZZs$pt|gY5j5;@Mt+SHN{J>|K|r-l+S4`@PFzXflZa)oAR!M6J5fAN zB9?k>dn*y%KDmRt=ZGZK&X$)FAz_^VG{V*3zm_8?onv}?>z;HNPf4alQD!jgjp$7b zc}V6-yv^Y;*OA+R9v2+55r+v0cq^jNiNyCTSi1*lE&5jK7?*Pr0b2**(BCooEoNg& zlboFOMi2(I(Ztrs*r>Kh7)h5xd#(d|KYAey87e@e8}f(X&{@8#ZD^68vU|C*PrHWo z60|NABkQ6)*%?*r@CTH3Z_qvRfe)d+vLl*{I1-LVV#T}A8%Ukz;F8=VT&lasMrU?h zb{T!S_Y?V1tZ;*96;fm|va=C84b+{>lwtP)#l?T_lG(9hf=#2gCqc+;!+2n9H2;0?YH`^#cfuE{fIAhvs=>TgX`R*6T$RC|NR1BiF(|O?#T6iUR+< z^vTpjDs+XIJ6?Px@{gMML@OrSNt%JRI?x5ZXp)yg8)8v>`K9(@du-W5K$^{NYlpws z6UNfB_9MxVWNqF_q!wm1d9I#_gV+WQOpHBDf9Oh(rG(N<3nOV$kHgK!83j3^`*YjdF52w4@x13MlvU@#Hy-%1*Id~?35Gw_3J1?VjC+b5`mJvLMOtGw1}qT5kk>cdxr*-5sec=%=*N##!{hgl7_<_tA1epnaj zWNBwu%Ja5QD1bayIjRslU@SE8Sy984j0j2N3yq^Y431qyQ-OioXD-?C?a!*8oosl> zwEK?aL9nU|svL|jYuady)mmUSYB>pn4M#NYB_V;G6UUY}T!o*QrDf}C&L>gvL|#EP z2)heTcX{Lm4W3jAK|p(MQ9+VW?6X~X_s%0qr+G1|o#H30!ETbA7QIVbic0jw-m^`~#jc87Z=>1oC5_?o(Hy0rmh) zvyDxeO`a#CpR=GtHDc)t;>g*2EjXRrqt|yb#qk#$$sA?Nl5d~&0@#Om-jd|Hfg<^0 zY)7&`ZC$gJzH#0gSh^nW$Gt(m_Za(s_0md00s)O9{B@8!Ka}^st>Od>j#=7RIsdP# zIC(?7t=?ll$lGc%X%n!D6Df3B>T}pJ$Y5k4S8Qn!<6!==7a#mW?1>!rH@nMd3=ABY)$7{{hv8)(~xr zbxEw;J4p;X!oVDtoZu1K5yUG&GXk4sAf`{UouDy-c))gx>J@ZB9Q8SCq;mI?gnU4b zL~H}A(fBh+p@{?W_~-SJX#5Ht`+fI#w++4n&`bWt&@AC2l5Udec!yw_o(4Q7|A~I& z?_OtX6QUC+>QGe+G7B2e98et4%Op$C1E%?~2pQz`mNJRN%8*0Ft4IUp)S;(D(_(qJ zu4k2`mGa^%+ckC-4F(O`0Ss}qpJqQXh+FZ{F%Bb+i>)C<-cXsvDu=%9FE~(_j5Zas zbWwq)x_b6r6IN(#&uR?u?Ez;1DQG~eeyapl3!|x%KIM9L?4_=C%sr2R_x}a?>k{{| zPQSBZ7xTS}QA}?-00ZnF7@#BY=i<@%kl(A=A5*=JY;BDG_Zrp>s9Eok2pG7*k6o2d zQUa_Kmh=7BI^l*G6T0*n+&h&h|9S4>5{g4A&5A@(BurVNnqpnjbmNQW|vs3ig)AXK`VFu0B>z?jVE31lFov+ODDngm1)W)8)B%_ zK~vVR&%7Kfs141Expkg4G*dQgINdxOgA#?|!){e;7lmb0S3=E~6#`L&{N%D^<|Jif z^Q+un?|F5!1ZwdBSbNgntbJ$j?|FrLxArbBM*q{Q;FbVytM|wRgj=eXJgyK@=yt+q zkse~P)DZ&bY^K)w8xWkYlAR$dePsEA%Ek{v=Wu>zb}En37R#It(pgRfZd+qqMB#PFm)L6C?fB#dxUER&!n;3nuK@Fe&k|D0{g-`l|G+Qv*7 z@tUt3GjrA5?QP|(vC=x~WvjxIZ7e+8Y&HqDQ3>pp99wu6k1Az?w3-%5P)N?(GQz*jS5*$aWyOxpknz}k5TI8-4l_S7qq&4WYGKCy50##!YRlp$|=k# z&MCwx#wo%n@mbK4q#3Ax#nNtKF!Ykh71@mg+Be43>>6v%V!;zDbIMrzC5 zAjW>8DNz$HRJVF_zX21c4AVIr4Yhjn`HYhwKbza7nJuTK7^TMH#S_2g$L-5iXUg(k z{(j(o;(p|Qrhe#t>VE8gu6}S~)B@>PCBuspjX&^}67S82!&49n3lIasx^3oT2PQZpg{2@co}Fa!F9jVS%y zulL9hP$I#|{aF6vE2ebAY@;suR!jJxaeZfvv|0K(gT7~Imt&+4~MZ< z9T?+QBWifb4LQRQLQP1FZe!sf9QXCo)BOV$rUW>yTT{+s|9#^{;}~rdbyYQ)6xwlP zB^?`CByP&7@THZAaI1Ad^cd{HsyhA2#J{G!OqNYHciSTpG-ixY_6fTYH)a`bULZXt zOiC>2-a;4-Dx0>u-{iSe7Hm*2#=|eg%Pq&tEyl+z?dX-1(9B_+4wh3ik~Y!`uXDkb z2dkcdLHR4T7tWOl|FGaJW-eqdy^Z?fSU|apZbjmfo7khT<=vcmI$LQ$LCKOhE)(vX z$*mN|me}ZBnOkvu7UEr@U$J}^qpj?)h}x2mfpK*F_Q$Mu1kK%NUcf;4&2KYM6aPl# z`@dx{a<*_VHa0RabG5W`aQx@4w<&)e0RqwlObD--68K>diM4KL^9@$sB-Z?LjE@Tcm%!{i`SR=wjK@D_+%vhtsWI@+WpFThdz(NHTm}9#(?wBgd(Mujv z+Wi#aJst&jPS7Pu8%1|+w;NY2bG`R3KX)W9k1Q}QhoT8=EVz@(vwigKPqLzdJk0-) zjKg!7H0+N#t>zr_DpZ?%yeITlS|Wd6=|vBEQ08l~UA z|J*|B7|A~3;9Wc~N!%!EqeO0ESL+QQ4k5fLTRdPUTcV=gEP$rMTXk&8c;YJrznEl1mhdE2 zNPgM1f+@hwEk*hwGCB9nbWExZCyR!;^w5Hr68<)@lkDXH-70Ozy$VbwKox{~NI9Ye zO^Q4bg;xzp`dU?pDkQVypm?vic$h|PxcN1D`2gN35!6|73&kLizoM$BybBddLdk~` zDYo!Io(S)IibsE0QTMI>LaGD7ABDHnGkDozatB3$8pUd|5!~ zyx@RU{x0i`BaS5}{I(-b_b#jNfYoE@Jd~VB!5{mP#By6CaQ>`e{rs!ei30(2;LLdA z%5(z>bYjeKJ~B`P^{v4l@T^mfC}reB%_>HcS_SrL+Qr|9mPg^8lj^}+pSQ2W=Feb+ z)ZoxX#1xT}x$dR%CQ1}Mnc)xf@z6vQEc7;q|5LHB&u)MADcrshDZZ5sju|9NK1unV z{?dHa((4j{qJM)cc~x@~`$!OeOzb5}kVI)3$4hSOT@%ZJbV8C6pm}719d62>2#l2} zf%PfJD>y*0363+vgPG_~EZ3C}w{8@7jtuf3zLVURR`y5CRBVLc5@<%a8DDCIIr!AWkEsEzuDHKmjqCSePb;?B;okMmUu+_L)tWC77%69<_Iq)l28g!!pe_r1ng86G<4q;Kke>fV7J;B+)`hmsgN4ETC9hL$kF6XKD+}(HE4a z$jy(HKCvVhn$6SEWMwk&{N?Lz`3*Fl~88epq2b7eu(AI|EUUI zOC=V(WN{-Vx4{<)WrK`8+BQ#e5RJ$qrgXRsvf_=%qhuF$M6&&B5dEg*S`<09oEgb6 zBnoTCF@Lhyy}Ya)(@hzFD(G#T5nDo;cq-{_Xn|7z>0XNLEj&55Z@MNGCyVJh)~tt< zb%4etHx*CsocfrW(V}fmSn7sRfH=<`H4|o)BX)9^#C@)km$#smcWsI@Cd5S*PBQJi z&el4LP>~RQaDaE?Bfhl8^1RUStk!m>mQ$(VVnr}n@TB5#CYMvem_dJAd_M45?lPXc zUB-QsZ*N(0Nt={h*M%19FNObT&74ZCGfB9r<&M|+&yY0|4@KDng+o6l^(awG8?6z@u_IZ5KDXr?ILw9rg@a&C2 zvEB*?i@^wF<4uxpI}VjdE&K zt^1dwn6Lw6FWKC9zT4zSx1LSyA0`3S;LI4~+H@uf)nd#LJT_1!^-ZB4FsxIVC~f42 zO>0Jyrm`B|-&=bCA;f)lTVeDGwu2aU1It$A5HinUjixHRnk(9p2lgLLBkmh>`ik9$ zhI2#Xw%)bWw;EVbDC)edvyo|ztPk7mHxCt=mw zgDc%v6`RcWi_h_!%NoDD$D4kZmAP-vDDciJ@D5PsdbMtQSx-k##r~hht^zEot?jCa zN=rz0cXxMp3Mfi5ASs=Kh;(;%hajB>B13nFz|bX~%72C%4&V2G_o}mxIK#6zoU_l~ z@4NQ9*AnBZ&vazakk>7um#N;S8k36ohJ>fyP#F8YpuI0Wh)7~j&YC;uh}qLYbJ@~J zM00`!ohA-tIYz{xMD8f2;cYw-Iu|e3qr)y)|^j z-q5C-b!%Bx35 z9Z*{Fv5n4+k#s&G{UiA6dP4i-9oyRhG=i58eo*xe?V9w-wYew79QV&&J{832lyV4q zLao!O<&d}Bq1jY@L_FeX|4WZH5}y=5%T=j;EW7~bjD78(`j|Itd%s~`r2S>s z6B?dQC5Mn1>N-7d+reE?(gR;)Np=#jDD6QO@`owGPq$w5QXt22cE5^oqk-G75MEu) zSZn}-H~a>bS}-J7JifCx?6o=u+2x?Pffan7IU!I9=xsi!#UquK%*_KXuhx71RwGG zgXz9Qg{GAAIJ45IVUx5}G6D!A$g9jPubT&}(}+|eicy*!tILR1;$3YZ>s|ej2NG1h z>=2Vda3sH8&)J|h>kbgSN$^GJHRYn&zu`cL)zGNtu?V=s$$OfcYTp9lP&< z?hs>|GIgqBaYj%o2@}&2RZ)x%glm$@R<@*>J7rnxK`x;zS+oP?8`Z8Xtmk>5s;}bM z#j|pC5tX}?5jP4_4|4KMo2hnXUvEWu?Wdpa@5}WPiakve<@Iai_qDt0C>M#0C7>FL|;$41D{W63m)DD>1@J ze1Fexsao3_OF$%I2@#&8MnmR0vES7>tH+`B(I8)DkuSU~c0TlBB-Y{}Ig!VX5U2Ce zdeYZHu?L-lHTFZdB3Jz}i}=FN?^yG!hnR3jx2l>+tiO)X^%t&Blv-l|#S7ECS3Wo4 zHkT3@cKy49EF_2wOXRdzGe*XVc981^nJ~u2sdhX!gEvGr(|*_?pKRI#MCok0XG3aQ zXn`HrMbWV)5W&5-k{OO_(r`g=cwv;`CY-Unv1-ji(zsuyjr|9s&G4Ks1n}Z`&BX}1 z>qVKPyCH}TmYIV1dc-J95drZs65Tmj%Yy^^7LVmPODXYXvapzfq{}Ibr$jrr9MWb; zdB98#DW}xM+$n{)vQ&{qlyS1bS(@T>i#QdeSPR*Hs*Irc?3mz~Y)uF4YO&BJ2ys!= zg-MDasduJU%5Z7m?gpYb`uyl4fAZE6h6hqtN>BHS*4l~~{(ANw@|8?Ub^rmKAXR@S zs++PWg;q)!Cvm4G=~7gwG&^yZCGC=3O)x8}Ov!^K;CAkqjDqV+S4!79xeTKD-J}d+ zp6&;nR_a^#&6JB$!>tFKut%4xQcJA+g~<6c*C$G@(SzcdX=0QcP4*QSio#iq{~i^C zNQ^-*Lyw;C0)gH;m*EFO6HOq0;MCl7@zHLZ)~Y6|a-rYCV&nf1IVqN~Xb7ztNgEwum>xPXP!DNexLZla!(1-(@) z-}oO()BZY~c0Zpi@M}Yk|0}5^bST}$ffa;Pm&UyF9-o5eTXO=Y5`x%Z9&Ne)9y8hi}= zLt#h|^6LZyLceFeQtDecBY_L?r8{!zvW;Zb}mI{$@F4hCWD0-KH5uHE3=jSoj^buX{g~p5V{D~bU zHWpqd$7o|LD{^BbXqfc1-RT7uBO#NdwqDkm)`3*TQ|h(I9fNPILl$4 zL{G0nYH6TA(~7>u@$qLvF79;$r=nLAb_ zRF1;4Y)9T6X0`6k5cRKQq;E(_i0p}r?A^(5U6{Q5R@BNmHGMuho;p;dNGp|>O*o{6 zHEj~|`ZIf`g8|()nM$HPBZ2LO7=8pJ0>)FwyRN(mo9$zXc6(E;v&-G84JGSbl5g88 z{bM46qatAv8SFpu-p}s^p{17zq;u|>_(WPk8ng60*6JrkmbXdXYTk>3&r?Y9Ke9p5 zI7Z-b@5-K|GHy!laK;MW*37kkj25QQ^@_a$h$g#MeXRScn>e<&6r1>m!~3dJIJO~* z;{wCkJ&v?yda+5^Ci#G_3Y!`KEsh^)9BI-?%Grtqs`;3-(^ePKt#P{$k25jv*bIpI znN;g~q^$s?3H7X1!h0=}L;zBNde$J}y*_Zo&d=ZabHfKpmcx@Y>YD=58y+rf^v3?? zW*rPv$bX2fyrPu+o8~INRV6A>s(ze$%}FoWnTzW~gO4qN)CRaLimgXwKUOI{%3eC$Yw}1s zQv`KrcJ}95y+1Qz%OD;`IlMh9Nit~lnQM@!I&5ySJWDFo#~sH@n#In^k-$3ZB%mqW zL)hd|3&#wG<+u1jxN1NN2h|-^AF@WakuAgh5=TgSegN~3NJvU#fTQ}Yw%zJL;{tOQ z*XJ5lg1SHhymH^mi35vAF1Kf^7(^Co?Y#$;W*!~hb9rR_jX7tT?Ad_OVVw5?nx4ei zTE*~uW*c7zFCD5~bkGA#z9BwThhWtK$|VM@YEN5q^3P<-nA8f@?oz8a#G2pgLqBD?cDX%AQ%S?vhh}2msLiSc3H$&|h{PAo_>jO+`_pUX zl*biB#Q9qlv3Z9Aag1Mnc$q%4zoK5w6z1O_zuKH9Id3Zn3%|U3y?i3`>2B%mTek*p zET1sFzS$$7MD1S$?Z0gwkQwj*K^q0oZW6VB1@`!f-&p;)2sg~geebgKzW5-Iu~DqX zMnFGI95HM;Eu@#deVa(3QU>X8?mJZ~PNGiYQN<6Pr%CAERzHFsMfq&E09@_(RkH2p zE>DPd_Ig|Yo9+cY&e^?%0+KK9rTQY@Lx3HMtKefFpPuEpmKU04yN%)+4E3BZO1FYSy$(&pp2RRk+>HDMO-=KlPJ;BE4iJM-E6f9RO#?h;h}>rjgVfq3!)0f{k~M)-q|N4Gmtk zmVX8U?||Se^1=sQ0ev)HRSO4yPNbhyEw^GL@UNr zQ>9fONqhmaio5V{3%rJC>IJtk(-kH7Xz%CMYGLDDNeH5KUUMTC5QIrvg872ILD8{0 zcwweY@q?W|KQ7^aqP;953k80)5pZ&hT1~CC$rcc^OXEZM+{92j6&wb*od@vMh*XG_ zh!lwAh-8SQ2qXx^2t){khy;lEhV*4+emNPZL7KW_#&(HdUz6GeWDT=TV z2u6cqd1MI$-!S6^7kEK59u6Uegz5X@E<1X+G$Urz3rY6dIs^|e?P`2k(JkW#cyDXElqFkh30 zclHW&G8_{-;N@cJd!$srju8(O4*HcOvIP&4+fT^H)(KvFf zs)oXUlC=ye09ku*pH`|nUsscx>#1{xlf&y7%C=~I2+v}IL0QdVm#-!q-B7T72=QL;`>~BwQ%7HV@n3W;l%&-> z7RucC^I3!kNXCTpy;s*8%cwjTin0IwStO!;q4`Q;+ zk{qknP1LzV1VI@*=~4IGlno-`_M=Y?o>_67r$foUAb{*khmw81X?Xs`6G>ug2wVyg zF?cjW>>A(bhMhq#qBtOod@{9Qw?%!kG0P;c#4$d9VZ}{Cae%W$&FWX#NLBIjR$u*V z8HqGlaMOBxf~@I!$qK1`7f;u!e@)}-hR$rUj#II4Z=M_H+ZC1BA|9vwR+IA1m~ylw z?gohs2=gSxN#HXIU8}HIyWTQVn8PJi-J%BgnZvJCM=IUP3-{M}v}Z**Z3~dHg#Y#W zo2CUg+o1tYtZ-L4;00Y)5c&1!VzdQRjp6eLkr{~b{yMDRcl}Zj2@E&c}?Km7!8q#9-X5Qi{-zef*yu7wUGTmI~m- z9FeQ=+y3!r-SfsBcsW zW?L3!d&zDN)SD1qM1pRcB}7=&irkx@{H-aq$b>8~v6&RHjI0npaFXT$}J1 zN6BxAJ=Z~wZb2m|O$bvUU@>Ou1;*ynQ%GS~O}lsDI}vha`jjpiABc9$28{<&jE}qX z)q2qn5O8vvb^SgaG;{9GJ|`wcG*@o`J`OqU$hct3BlUQG2`iVql#xNG3%OOIB0%@T zpHJ9YjioziwgpoliprUHn76oKlFL+9SKU2McR8nRVYQJD(#QsBq=Ph)K^pNOjpz`5 zxCU&SaunY2PrWr3FPSEwWUX1sW!t&IXgSBG=cf;koC62Qvt&uFt3}8 zr4`89_L-X#$njr;{mFh05VR@vc!0{HyOm?aa05;#XFk6-{;ZTaR*Sxwm03xk-fep& z!XyPMp<^g{9CH7AAflgUyQ}c11W4)_V!PiLcZi)@RL{1<>6USFF{j((nUrK;{*__H zuV_6;o{v@u*pMS8Okxt?gybb!Hz?zX<|S>*#-sMg7-u(!Wev{^6%e00oC7l&r61S$ z&haI|Ykp?TlCeM%l4Po0H!06F9g$|(DMWP-ZOzfD4jUUlJG{G{>C_{2I0Wv=!BNf+ z$4{hp-!ZZ5B=$hCe$JdDVAHL##XJ69?Src%6X|D0ULwRo3sKh0qb%$7{uZE=EkwO1wBEWt9(*#X3O4z!e(nrtYyE*p7osPCQ{*3Y{VS#b?`}l zrgu}w9@mRF)*hPobOR zssW;Ye4s?*S25hetD9W|8ugpF16uA@7KYCpK}M#I|6ADhq}u}oZChO)pfGJQ+i7j& zR-82-Qiu3Rk8mkpB6qxZb9Jlok&pDF$#iMAVwUn4xLx#TTtQPdWdsEv7NL;?y11IA zD3S%AcWv(?Vrjd5-=YMHM0t*Qke@}d`1qW4ugheOTPE8!d4Jwc9?)aCzqvK(?ncRr zC5U|~259$0__ZOXKM03cfv53nLr6zRM{33Lq&n66j&?1!Q{cCIm*uXxUxJ^GAIV)G z3_=89%uA{7E#FT3`r)5F_{n_w{@e4G9tAR-{>z|35``yyQm3VN7?Bs7t)qlzDJClT}K zp5FX`v4mG(q;?WnGY)|z!J=vX?Is?M+sEc?CbGDK+|B;MO6tt3^{UH%kJSJYutRD#)i zl}Oj6phLAx3bA#oab#xf=WC(0uy+KtJ!~4OhJ5FXNgd59Hda&7BI*(zO;AkQND-1^ zBI>YoV_MbzDIv6%ZI(1Hp15CEsiJ-0rsvhXOobkWlOJ{$_YK4qnJ6K3AOypb@+r?< z_sU(dkQRdtRl7uYEMAbTFV1JLIA7t%E9QiJuKZpZz2C#n&}Et@)}%^_j0)161WTm& zBW=?YmW*V0k%*{`9~EEr%R5QDI=7*nmx~L7Xdr1(O-IX2GkqAb~}deP7ogdz+RV0Z80#uT+1Qkod>7=*%x3wXHOm86_n9mY)PgH}n`(?&o9v?`vpa^ja-qPZ< zu4cFFLzP2v;>)xhxC-+gc3Nwl{@`e5>~IHDgXYs>$B6o{&IElWNmArtuF*5qd$r1# z0i;u-l^**GF+N$W*`9QVf@7-r_+w>V8KaH#3>9zoy=kZRq?n>Gm43RW`OIim3hu>d z=kS+I9jG4&VL52_x)*=;C?$KsFiTzImD_!mVm{3UQ98ETB3LI(2|`{%w%79*Gx(I_ z5vRcN#js!IHr)FzFM9eejp-oQ7x<C1UR^U0S^!vOlBs?pey1UGQ(1rLLHl><^&R zYgwiKz5^BA&COY<8QkW)ZhqI;sSm#2z* z7p!y>$IO-r$Mz}Gf@OC~UAJYU-Sa?=v2Jw}8^Kju6h-RGf@2$Dj`GZi7~2%xXnZ`} z>L;Wq)^9n?zABr1eOoSdA~bHWoan-Rg3+!#Y^{Ho3G!=K=4VrPVF@A>FqsV!pAH$m zw_jiPvPY>Fx7SxGPJUJVlK=kO4OSg?=2~#v=m_T1r?iF@wM4-=97tUKA4pu{@~}=B z1=71Ca+CHc0&z*IqA_e9jGd-gn2Qc?pQVnLx1BG1`Sx@x*t<1pd&}c%9meu~5b9l&w_}g$sUDD-$+Hxeu&KuCl%?PI4pg6m*K=D0TIsEbpZGTuVMDxz~;*0X4wSZfs5-_>Z9v zx`6pFBiTMf!SBUHA^n%#KbH+8TkO zZew7o{CxWIi!;}ALiTk0Q=%ojrIULBl{KEe2K(k~K?giNcvhN26)PG&q0tQ!j$5K{8~bWLbysRzmtp9U$~52FvN1zG zqz)nz6aN+!biAt6?etvXehS`_tXE0(eq0-$R*}4Kd2nop#)W)Aam$(P)qt@qo5IQN zHod)UdpsRu2s+mr!klox7DR!$hTANpNZCJ_ZIp$nw?y7wwYgxltD(9%SMl}q{#ute zu%uW=zpb!wOZ{zjxULY6Q|HCVGxj{xiEXguGAnkszHr?!Nmr0gfT^KBYo0Ad#>{uV z5|O$6XjwWIMFIp?#JDAN$0y7F2C;?GLrxNzI2Xj#6PtPAh~XM4@XGkbA3_#d`_x1p z=U)$eG{Q*>M8g!}7L|h`vH~5-)LuB!ue^SaNO6hp1RBx7nK6o|9qZ=pxbnEbS>^B} z$QTuaJ-!k~m~H|ZZxrzr?cQ|zxs3HWSMFIpz_`cEL6~l>(;qg}$bnXOr_^e{89zxt z#q+6+(5>5L=@Ib-xj114`0f^tw>pQd9a##g-{!*ehmY{AU3AC|JB>)x`4)Za=t6KT zPY{jjy~9{J*}pw;NL?d%Mvb`JJ1^vgd`X0A3VuFek@)eMW2FuVW%?8 z(|=r)ql6lk-ECso`#8WpfVT{8lQhe|g}MIa;=9$?b)pqW&*CJ9BAv;Thi*6S;UYhG z_bK~`TbaLDzmY}~it>8GVEUs!*^jc_V zu9HU@vchoxG4)HwE_U)CANYFwLkPNYYk8Vkx$ZqT<8-CYLE}q{t3csRcnR*~LaSLe zK+}9-uBf~%h&{xP_olnb88vF=Yb*uy~UXOYk zY8v;0(j1LUw!|>_oMjs^-_+t1pycj;NHvXVInQ&=SLaQ5vSRiH<%=O%Ufm2!-i)eD zRb%Xa>aIYn%qli+pBmE+zlzqkm@QXkp=dMt2vTxSMfqaZA96V^UrReWF)44Jc%fE4V%d!)q!V;;0+w?mVFX&ZH@jlrLcet~e8KlaNLd)Lj1>;AQ>7m6Ix7c!nMGnPW;my&NLUjxhJva3? zGfZ}q6!Y6^gs?RcC%eEiBa%f=n3ts;ky>Qx2JZtIfcUN*&j4~C%bZNcAjb@2V+c3E z+}kW_->)7>UcgpG85dQsz+}qicDJc#0LcDKnc7ioAOXmtSrgEc78%f}4%4>78D#kU zREh;fM@DG|+1;=ne}CFQoN30?@vHY4ddtVBGPd>yYg{|{mdK}m#F-Ua%vDX26>QIs z(DVYZ?Ya-g*2*FzJFDJmY#E1I@XE~TaF$*e(8#C%NTrtcm8`>KGo?$q#Nt|JTCtW- zxf60b$Syv*Lh|uJi?koc{g94&o+ZLA^%YigIrW7HtGr!q;cQ~`tvt%bi45vod5ot~ zSX$Cey|G1Ixu96Np}U0r4!)R*mGALvTq}nw=X@1Y6{FV4@$!RCUz&IkeM1I&dr*zp z^2j--REp7G%EQyQX?#(i@;~6SzpeRi$#Uod_g@ca)Ij?Wy8~4zjMbG2IWAcXNYq1Y46POLeb#RZi+x|d zHC1vMW6mQjB9Ybl05U=SZeSLZ3}GStV#K24S&x&X^xH&7mZz(yFLTB>>U=CXbuP1Q z&%8a%p3FEd?Z-pa$fp4{aw0&DJS?P*^$#_2q*)22AHs*XuGPr1uGPpRQ~@<|Hb9MB z1*%4_l34(#kqaCmyN5&7$nUuS>39IQ#&n44j^?HMmm0a1nNcn31#gUp%f}|c*Tt_( z`M;OHDG5(n`CQ7cIlphC_SWr5z5QI1R%nv;cvQ)q_m0+pl{ZY85gsd~*EQ#P>w@=`Qmk-!A`LnQPq>gnJ_iTuZmx6}<*s{!}CHS=XTP ztVkGC+Mc4Wz72nJBHQLWuR5+g_sZMP+rKu`dQE2yXU)76C@mLlz4KH`dR1Zj(uucq zauMItwnN}qnHa%9{(&_YvcyrARU>t=QmI7~_t@bRv;*Rm7E7clX9*rjufWNN?@@(O z{1ML!in*09UrfCJ(rdp*r`KTZJi8Sz89$jnY3lLqeJ^topiXY-`~~WF@Z#Fk8nby{+dV}zMy-N-1!8xzs5)dG7HYLnZ|XmMwZ1^9 z)aDtnjt=!ZK!~=C(FspP1pE#VhBflN^34_d&sM+{r+({H!wmW3infWUiF52N#_s)x zhSB*VmxGj9f=l*y3~`E{vJU4kxbwf~tczAk0Q?TV0)7X{WTpKy@3}Ttd$ordwU+4K zvIeiFyzzUa&uKYwa_(gD-5>)rXoI@=&?=0#Jy2u7C1El1V8;*jrG4N*IqK+Iw(fkX zajg+HS|@H5#Hr<7WS6_ePZC;jRsYNfV(5P4ccmT#Kg3G9cl{3;?ECq@KEClj^!xwk z8&M!A@YT0XNjHQJ1y%#`MJmNT481?p$XT*hsCsA=tU~|vJ8(tdg1!}*pn8B|X>i94 z0Yb&N|CbthIjREqMa*0H_YHs=ITm~lEw0o@T108JLOR5t>~~Mb!~ResN65oSDQrKc ztF)KDYj*W1pz?0C5J^)^oG zx^$#{$GrStsx)uc0?%WC#@~B|rJQkoz4MNDT@mjQF#F8j*b9R8AHxn*0@VcMYzl}b zI$9V4HY@*2gJJHo2MF5Ue((T2jpXL;txk1Hg~ikg`!hoHZ{A0yj?r7 z6WLiL3x8YcA-s=dPV9?1g(8&kXjj{Fp2KI~pMAeb1BETT%SdwL*xl|qX*n@J6ksLD z;_k;N(^PttU$(BAuivgPaq)`7E%aoo7*CFXnX5)>+vx5XmPib`1k>kiuaxdJR*j{ne?~z)FM8*+qsZg<6ynqIyvP|Lp%i@w z-j&W|{loP;5roDoPu;4yS`M!!GVsFdMJlXytpJYJ|_Sr5-iAzM^7Ct z&2*kH{zL{g2e5nZL?>iRI5?}LN(6ACJh7UMS&sgG|EFR%j%V2p}9UU3MNnbWwX|8M+X>S#x3D&!nAthWpkIEZ~xmT8)-Afim)A z3SV9b5uO2)O{Ef~jr=~lI$=i*rOb4KTt$=V9aqpNORi9Brc5R^6b&_Aypp|6?vebs zVxkDehIH3Q^sJWYnx+(nuLg*a2vV#^8!}&XX%m;==F_JK2RbW#vD^EKSY`S2>y(J zwESw|c>xq_G}Hk3{%gxASA-Eu0{Acj#jUW(_)l|L^q``9<)W1%6f(E_u}55QskmkX zezr>cDo{ULuC+KY&wNj5Tq#&@^W?Zs^a)WTMg~Ke*04MklM@KrOZ4R$g`Q-_;63O2 zN=Alz#OHqL#u^s4t+P+Ud!LuwFU@iZpOr#+fiNqhZvpQm!$Q~;PMpc@zLrxvNO1@! zBrP(Cd59z=C9sCa?Q0<54AAwCL?oFl6Aigt7??qI`fc_S?-dCF(o(ZLD z#}7Kyp{28uDblh)VAB; zsA*$vjLt+Omkd#t0&)*tJ)EY`j`eeDV9~&5c=v`*IcED{bY%^yE|NY>syzQ>hJET- z$&s~bE2(n)oN{65^@6^n#O_qn(AlyV9D%d7tK3)zSePA{!J#o|I0{wkec@ME7rO$N z(>q(I*Cag8P7R$65OZPNh`Br*Zr)C4%w=rhWCi$T{_Fjms`3Cq8&j4CC@y*uxSxM1 z5V<0>PjOokAyOZ9jOckN>y@Zms+UVvj^Gihe)Lse)fHosT24=;zS;rwhh|C!+uL0Y z_seX{L}(^GOji%WFZ#K>wA1`1CBh^Du9Pj{kE3oRLL;ZPa;t#16NJhk#lOHK zf8RSRn9llRL~ar5oOM1h^eZhXa)`xI==QO%kC&e%=XUg&RquRJ{_1|89fAEe$89__ z-gZ3oAIFx>^ES~N+)J-DPva0{eF~~nK1=xgkh^jpy5e_nU88{p-GO{jav4AefVxKh zqS_z-{~f>?P&fG34v+}kd}w=~4K26ozU~04l+B+{{xGNx?2u4eYbUfi;?(4fjgF1d zV${O=sGzndUW5-STs5YhIw~tdJcqKIi4x zhHIrJ{mKPr#Mt1SDN(dR?DJPmA(p%Xp8laMq1OG{{aGw0rL&vLk*SfvahkBy z;az}P%M1pXA1qDs!=G?LP^LkYK0WJs?qa~`Jvv0fCGxKgl4fnAyG*%jy^XPTek4e( z-R>o)mEzRt`4%RNjidZ{!Ks1G(!F@5thw!S%pC2w1@)gmkpM+rhSury2(`2NaG^6tNXzM0UVTu*{ zv0fUC%iaG2L=EZ>c9XC3l)EdRbhT~1J;K8)^AF|1JF<@cvxttj5Dm3D)tTNE9zUnE9wkj zLj|2-v7u_Lu)2shPdkK)!KVZwmvL;GYCe07R&`2`nO1jsbQ*-_47V zAO^jtzah>5AVY;2V3DEw0by&+Z$7+s3IOsy><$C~p*sU%0ih-8b}*&NH|HB>ty{le zunwR@i`8M#q2SYm27nAL-GD`gu7|>|8r-~rY!IV+01RDrfIU+veGSVn-Mj{jwgB+&kl6q-l+1=jhB8#J+4#)~Bj+W6 z{I7f!01M@+U}2%DJZx5Y^GXal1F*l5*aN`O#2ywHnpDDO8#k{+rYkgj{EZY7K!&E5 ru*lGC1vY5CiJawrjr{Mq3xEvGTT~U`??HoEQsD0a5OP&}LjU)F9> diff --git a/test/data/script_samples/tibetan.csv b/test/data/script_samples/tibetan.csv index 286e181..aefebd6 100644 --- a/test/data/script_samples/tibetan.csv +++ b/test/data/script_samples/tibetan.csv @@ -1,28 +1,7 @@ -"tibetan","བྱང་ཕྱོགས་བསྟན་འགྲོའི་སྐྱབས་མགོན་ཐམས་ཅད་མཁྱེན་པ་ཁལ་ཁ་ཨེར་ཏེ་ནེ་ཁུ་ཐག་ཐུ་བློ་བཟང་བསྟན་འཛིན་རྒྱལ་མཚན་གྱིའི་གསུང་འབུམ།","Byang phyogs bstan ‘gro’i skyabs mgon thams-cad-mkhyen-pa khal-kha er-te-ne khu-thag-thu blo-bzang-bstan-‘dzin-rgyal-mtshan gyi’i gsung ʼbum",,"{""capitalize"": ""first""}", -"tibetan","རྗེ་བཙུན་དམ་པ་སྐུ་ཕྲེང་བརྒྱད་པའི་གསུང་འབུམ","Rje-btsun-dam-pa sku phreng brgyad paʼi gsung ʼbum",,"{""capitalize"": ""first""}", -"tibetan","རྗེ་བཙུན་ཐམས་ཅད་མཁྱེན་པ་དགེ་འདུན་རྒྱ་མཚོའི་གསུང་འབུམ་བཞུགས་སོ་","Rje-btsun Thams-cad-mkhyen-pa dge-ʼdun-rgya-mtshoʼi gsung ʼbum bzhugs so",,"{""capitalize"": ""first""}", -"tibetan","སྒྲུབ་ཐབས་འདོད་འཇོའི་བུམ་བཟང་གི་བརྒྱུད་པའི་རིམ་པ་ཕྱོགས་གཅིག་ཏུ་བསྡེབས་པ་བཞུགས་སོ།","Sgrub thabs ʼdod ʼjoʼi bum bzang gi brgyud paʼi rim pa phyogs gcig tu bsdebs pa bzhugs so",,"{""capitalize"": ""first""}", -"tibetan","བཀའ་གདམས་ཀྱི་སྐྱེས་བུ་དམ་པ་རྣམས་ཀྱི་གསུང་བགྲོས་ཐོར་བུ་རྣམས་བཞུགས་སོ།","Bkaʼ gdams kyi skyes bu dam pa rnams kyi gsung bgros thor bu rnams bzhugs so",,"{""capitalize"": ""first""}", -"tibetan","སྤྱི་སྨན་ཞབས་ཞུ་དང་བློ་ཡིད་ཚིམས་པའི་མི་ཚེའི་བྱུང་བ","spyi sman zhabs zhu dang blo yid tshims paʼi mi tsheʼi byung ba ",,, -"tibetan","རྒྱལ་དབང་སྐུ་ཕྲེང་རིམ་བྱོན་གྱི་མཛད་རྣམ་","rgyal dbang sku phreng rim byon gyi mdzad rnam",,, -"tibetan","དག་ཡིག་ངག་སྒྲོན་རྩ་འགྲེལ་","dag yig ngag sgron rtsa ʼgrel ",,, -"tibetan","གྲུབ་མཐའི་རྣམ་བཤད་རང་གཞན་གྲུབ་མཐའ་ཀུན་དང་ཟབ་དོན་མཆོག་ཏུ་གསལ་བ་ཀུན་བཟང་ཞིང་གི་ཉི་མ་ལུང་རིགས་རྒྱ་མཚོ་སྐྱེ་དགུའི་རེ་བ་ཀུན་སྐོང་","grub mthaʼi rnam bshad rang gzhan grub mthaʼ kun dang zab don mchog tu gsal ba kun bzang zhing gi nyi ma lung rigs rgya mtsho skye dguʼi re ba kun skong ",,, -"tibetan","རྗེ་བཙུན་དམ་པ་སྐུ་ཕྲེང་ལྔ་པ་དང་བདུན་པའི་གསུང་ཐོར་བུ་བཞུགས་སོ","rje btsun dam pa sku phreng lnga pa dang bdun paʼi gsung thor bu bzhugs so",,, -"tibetan","བདེར་གཤེགས་བདུན་གྱི་མཆོད་པའི་ཆོག་བསྒྲིགས་ཡིད་བཞིན་དབང་རྒྱལ་ཞེས་བྱ་བ་བཞུགས་སོ","bder gshegs bdun gyi mchod paʼi chog bsgrigs yid bzhin dbang rgyal zhes bya ba bzhugs so",,, -"tibetan","Tshe dbang rgya gar maʼi khrid dang sgrub pa bcas kyi gsung pod","ཚེ་དབང་རྒྱ་གར་མའི་ཁྲིད་དང་སྒྲུབ་པ་བཅས་ཀྱི་གསུང་པོད།",,, -"tibetan","Tshe dbaṅ rgya gar maʼi khrid daṅ sgrub pa bcas kyi gsuṅ pod","ཚེ་དབང་རྒྱ་གར་མའི་ཁྲིད་དང་སྒྲུབ་པ་བཅས་ཀྱི་གསུང་པོད།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Bde gshegs kun ʼdus kyi gtum moʼi khrid yig zab gnad gsal sgron zhes bya ba bzhugs so / $c [Smin-gling Lo-chen Dharma-śrī].","བདེ་གཤེགས་ཀུན་འདུས་ཀྱི་གཏུམ་མོའི་ཁྲིད་ཡིག་ཟབ་གནད་གསལ་སྒྲོན་ཞེས་བྱ་བ་བཞུགས་སོ། / $c སྨིན་གླིང་ལོ་ཆེན་དྷརྨ་ཤྲི།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Bde gśegs kun ʼdus kyi gtum moʼi khrid yig zab gnad gsal sgron źes bya ba bźugs so","བདེ་གཤེགས་ཀུན་འདུས་ཀྱི་གཏུམ་མོའི་ཁྲིད་ཡིག་ཟབ་གནད་གསལ་སྒྲོན་ཞེས་བྱ་བ་བཞུགས་སོ།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Bod kyi la gzhas gzhon nu ʼgugs paʼi lcags kyu / $c Kun-dgaʼ-rgyal-mtshan gyis bsdu sgrig byas","བོད་ཀྱི་ལ་གཞས་གཞོན་ནུ་འགུགས་པའི་ལྕགས་ཀྱུ། / ཀུན་དགའ་རྒྱལ་མཚན་གྱིས་བསྡུ་སྒྲིག་བྱས།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Bod kyi la gźas gźon nu ʼgugs paʼi lcags kyu","བོད་ཀྱི་ལ་གཞས་གཞོན་ནུ་འགུགས་པའི་ལྕགས་ཀྱུ།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Lhug rtsom pad maʼi zeʼu ʼbru","ལྷུག་རྩོམ་པད་མའི་ཟེའུ་འབྲུ།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Sbrang char lo ʾkhor 20 dpe tshogs, 1981-2001","སྦྲང་ཆར་ལོ་འཁོར་༢༠་དཔེ་ཚོགས། ༡༩༨༡-༢༠༠༡","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","""Sbrang char"" rtsom sgrig khang gis bsgrigs","སྦྲང་ཆར་རྩོམ་སྒྲིག་ཁང་གིས་བསྒྲིགས།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Dpar thengs 1","དཔར་ཐེངས། ༡","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Zi-ling","ཟི་ལིང་།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Mtsho-sngon mi rigs dpe skrun khang","མཚོ་སྔོན་མི་རིགས་དཔེ་སྐྲུན་ཁང་།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Rgyud don rig-ʼdzin dgyes paʼi zhal lung","རྒྱུད་དོན་རིག་འཛིན་དགྱེས་པའི་ཞལ་ལུང་།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Rgyud don rig-ʼdzin dgyes paʾi źal luṅ ","རྒྱུད་དོན་རིག་འཛིན་དགྱེས་པའི་ཞལ་ལུང་།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","Gnas brtan chen po bcu drug gi mchod pa rgyal bstan mdzad med nor bu zhes bya ba bzhugs so","གནས་བརྟན་ཆེན་པོ་བཅུ་དྲུག་གི་མཆོད་པ་རྒྱལ་བསྟན་མཛད་མེད་ནོར་བུ་ཞེས་བྱ་བ་བཞུགས་སོ།","r2s","{""capitalize"": ""first""}","From Lobsang" -"tibetan","སྐྱབས་འགྲོ་ཡན་ལག་དྲུག་པ་བཤད་པའི་བསྐྱུད་བྱང་ཟིན་མ་ཊཱིཀ་བཞུགས་སོ།","skyabs ’gro yan lag drug pa bshad pa’i bskyud byang zin ma ṭīka bzhugs so","s2r",,"From Lobsang" -"tibetan","བོད་ཀྱི་ཆོས་འབྱུང་དང་རྒྱལ་རབས་ཀྱི་སྐོར: སྦ་བཞེད་དེབ་ཐེར་དམར་པོ: དེབ་ཐེར་དཀར་པོ: གངས༌ཅན་བོད་ཀྱི་རྒྱལ་བསྟན་ཕྱི་མོ་སྔ་འགྱུར་རྙིང་མའི་བྱུང་བ་མདོ་ཙམ་བརྗོད་པ་པདྨ་དམ་རྭ་གའི་དོ་ཤལ་གཞོན་ནུ་དགྱེས་པའི་མགུལ་རྒྱན / སྦ༌གསལ༌སྣང, ་ཚལ༌པ་ཀུན༌དགའ༌རྡོ༌རྗེ, ་དགེ༌འདུན༌ཆོས༌འཕེལ, ་ཁམས༌སྤྲུལ་བསོད༌ནམས༌དོན༌གྲུབ / པའི་བེའུ་མིའི་དགེའི་ཤུའི་མོའི་ཐུའུ་གསོའི་པའོ","Bod kyi chos ʾbyung dang rgyal rabs kyi skor : Sba bzhed Deb ther dmar po : Deb ther dkar po : Gangs-can Bod kyi rgyal bstan phyi mo snga ʾgyur rnying maʾi byung ba mdo tsam brjod pa Padma dma rwa gaʾi do shal gzhon nu dgyes paʾi mgul rgyan / Sba-gsal-snang, Tshal-pa Kun-dgaʾ-rdo-rje, Dge-ʾdun-chos-ʾphel, Khams-sprul Bsod-nams-don-grub / paʼi beʼu miʼi dgeʼi shuʼi moʼi Thuʼu gsoʼi paʼo",,, +tibetan,ཀ༌ཁ༌ག༌ང༌ང༌ཆ༌ཇ༌ཉ༌ཉ༌ཏ༌ཐ༌ད༌ན༌པ༌ཕ༌བ༌མ༌ཙ༌ཚ༌ཛ༌ཝ༌ཞ༌ཞ༌ཟ༌འ༌ཡ༌ར༌ལ༌ཤ༌ཤ༌ས༌ཧ༌ཊ༌ཋ༌ཌ༌ཎ༌ཥ༌གྷ༌དྷ༌བྷ༌ཛྷ༌ཌྷ༌ཀཾ༌ཀྃ༌྅༌ཨ༌ཨི༌ཱི༌ཨུ༌ཨཱུ༌ཨེ༌ཨོ༌ཨཱ༌ཨཻ༌ཨཽ༌ལླྀ༌ལླཱྀ༌རྲྀ༌རྲཱྀ,ka kha ga ṅa nga cha ja ña nya ta tha da na pa pha ba ma tsa tsha dza wa źa zha za ʼa ya ra la śa sha sa ha ṭa ṭha ḍa ṇa ṣa gha dha bha dzha ḍha kaṃ kam̐ ` a i ī u ū e o ā ai au l̥ l̥̄ r̥ r̥̄ +tibetan,གསོ༌༌གནུབས༌གནོན༌དགོན༌དཔེ༌དངོས༌བསྟེན༌བསྒྲིགས༌བརྒྱུད༌མཇུག༌མདུན༌མགྲོན༌འཕགས༌འཕྲོད༌འཛིན༌འགྱུར༌འགྲོས༌འབྱོར༌འཇིགས,gso gnubs gnon dgon dpe dngos bsten bsgrigs brgyud mjug mdun mgron ʼphags ʼphrod ʼdzin ʼgyur ʼgros ʼbyor ʼjigs +tibetan,བོད༌ཀྱི༌ཆོས༌འབྱུང༌དང༌རྒྱལ༌རབས༌ཀྱི༌སྐོར༌:༌སྦ༌བཞེད༌དེབ༌ཐེར༌དམར༌པོ༌:༌དེབ༌ཐེར༌དཀར༌པོ༌:༌གངས་ཅན༌བོད༌ཀྱི༌རྒྱལ༌བསྟན༌ཕྱི༌མོ,Bod kyi chos ʾbyung dang rgyal rabs kyi skor : Sba bzhed Deb ther dmar po : Deb ther dkar po : Gangs-can Bod kyi rgyal bstan phyi mo +tibetan,"སྔ༌འགྱུར༌རྙིང༌མའི༌བྱུང༌བ༌མདོ༌ཙམ༌བརྗོད༌པ༌པདྨ༌དམ༌རྭ༌གའི༌དོ༌ཤལ༌གཞོན༌ནུ༌དགྱེས༌པའི༌མགུལ༌རྒྱན༌/༌སྦ་གསལ་སྣང,","snga ʾgyur rnying maʾi byung ba mdo tsam brjod pa Padma dma rwa gaʾi do shal gzhon nu dgyes paʾi mgul rgyan / Sba-gsal-snang, " +tibetan,"ཚལ་པ༌ཀུན་དགའ་རྡོ་རྗེ,༌དགེ་འདུན་ཆོས་འཕེལ,༌ཁམས་སྤྲུལ༌བསོད་ནམས་དོན་གྲུབ༌/","Tshal-pa Kun-dgaʾ-rdo-rje, Dge-ʾdun-chos-ʾphel, Khams-sprul Bsod-nams-don-grub / " +tibetan,པའི༌བེའུ༌མིའི༌དགེའི༌ཤུའི༌མོའི༌ཐུའུ༌གསོའི༌པའོ,paʼi beʼu miʼi dgeʼi shuʼi moʼi Thuʼu gsoʼi paʼo +tibetan,འོས༌འོད༌མོའི༌སྐུའི༌བེའི༌འབུམ༌མིའི༌དགེའི༌ཐུའུ༌ཀན༌ཤཱ་ཀྱའི༌ལི༌ཁྲིའི༌ལས༌གཞིའི༌མཐོང༌བའི,ʼos ʼod moʼi skuʼi beʼi ʼbum miʼi dgeʼi thuʼu kan shā-kyaʼi li khriʼi las gzhiʼi mthong baʼi diff --git a/test/data/script_samples/ukrainian.csv b/test/data/script_samples/ukrainian.csv new file mode 100644 index 0000000..1d47212 --- /dev/null +++ b/test/data/script_samples/ukrainian.csv @@ -0,0 +1,2 @@ +ukrainian,А а Б б В в Г г Ґ ґ Д д Е е Є є Ж ж З з И и І і Ї ї Й й К к Л л М м Н н О о П п Р р ,A a B b V v H h G g D d E e I͡E i͡e Z͡H z͡h Z z Y y I i Ï ï Ĭ ĭ K k L l M m N n O o P p R r +ukrainian,С с Т т У у Ф ф Х Х х Ц ц Ч Ч ч Ш Ш ш Щ Щ щ ь ъ Ю Ю ю Я Я я зга Зге Тса тсе,S s T t U u F f KH Kh kh T͡S t͡s CH Ch ch SH Sh sh SHCH Shch shch ʹ ʺ I͡U I͡u i͡u I͡A I͡a i͡a zha Zhe Tsa tse diff --git a/test/integration.py b/test/integration.py index 00ce550..331d67f 100644 --- a/test/integration.py +++ b/test/integration.py @@ -27,7 +27,7 @@ def test_sample(dset, report=True): dset_fpath = path.join(TEST_DATA_DIR, "script_samples", dset + ".csv") log_fpath = path.join(TEST_DATA_DIR, "log", f"test_{dset}.log") - with open(dset_fpath, newline="") as fh: + with open(dset_fpath, newline="", encoding="utf-8-sig") as fh: csv = reader(fh) i = 1 for row in csv: From 21e05ef2af68e9934c4112284c01b0c390152fb8 Mon Sep 17 00:00:00 2001 From: tventimi Date: Mon, 2 Feb 2026 14:02:36 -0500 Subject: [PATCH 2/2] Update _chinese_base.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporates CC-CEDICT update from 1/31/26 (https://www.mdbg.net/chinese/dictionary?page=cedict). This removes a number of terms that contained an extraneous hyphen (e.g. 中时 -> "zhong shi" rather than "zhong - shi"). --- scriptshifter/tables/data/_chinese_base.yml | 184 ++++++++++++++------ 1 file changed, 135 insertions(+), 49 deletions(-) diff --git a/scriptshifter/tables/data/_chinese_base.yml b/scriptshifter/tables/data/_chinese_base.yml index 0146458..28fa7f9 100644 --- a/scriptshifter/tables/data/_chinese_base.yml +++ b/scriptshifter/tables/data/_chinese_base.yml @@ -1,6 +1,6 @@ # This file is derived and kept in sync with Princeton's OCLC Connexion Pinyin # converter (https://github.com/pulibrary/oclcpinyin/). -# Incorporates Unihan update from 8/18/25 (https://unicode.org/charts/unihan.html) +# Incorporates CC-CEDICT update from 1/31/26 (https://www.mdbg.net/chinese/dictionary?page=cedict) general: # Section names and other keywords are all snake_cased. name: Chinese base (from Princeton) @@ -862,9 +862,14 @@ script_to_roman: "\u5352\u6B72": "zu sui " "\u5352\u5C81": "zu sui " "\u963B\u585E": "zu se " + "\u963B\u6A48": "zu nao " + "\u963B\u6861": "zu nao " + "\u963B\u5687": "zu he " + "\u963B\u5413": "zu he " "\u8D70\u5352": "zou zu " "\u594F\u6A02": "zou yue " "\u594F\u4E50": "zou yue " + "\U00031336\U00031339": "zou yu " "\u9F71\u9F75": "zou yu " "\u6A05\u967D": "zong yang " "\u679E\u9633": "zong yang " @@ -888,8 +893,6 @@ script_to_roman: "\u7740\u88C5": "zhuo zhuang " "\u8457\u91CD": "zhuo zhong " "\u7740\u91CD": "zhuo zhong " - "\u65AB\u7572": "zhuo yu " - "\u65AB\u756C": "zhuo yu " "\u8457\u8863": "zhuo yi " "\u8457\u610F": "zhuo yi " "\u7740\u8863": "zhuo yi " @@ -954,6 +957,7 @@ script_to_roman: "\u4E13\u957F": "zhuan chang " "\u8F6C\u6587": "zhuai wen " "\u8F49\u6587": "zhuai wen " + "\u62FD\u59D0": "zhuai jie " "\u62FD\u6B65": "zhuai bu " "\u722A\u5B50": "zhua zi " "\u722A\u6A5F": "zhua ji " @@ -995,11 +999,6 @@ script_to_roman: "\u91CD\u9EDE": "zhong dian " "\u91CD\u70B9": "zhong dian " "\u4E2D\u7684": "zhong de " - "\u4E2D\u610F": "zhong - yi " - "\u4E2D\u6642": "zhong - shi " - "\u4E2D\u65F6": "zhong - shi " - "\u4E2D\u6B50": "zhong - ou " - "\u4E2D\u6B27": "zhong - ou " "\u667A\u5229": "Zhili " "\u6267\u7740": "zhi zhuo " "\u57F7\u8457": "zhi zhuo " @@ -1099,7 +1098,6 @@ script_to_roman: "\u5F35\u89D2": "zhang jue " "\u5F20\u89D2": "zhang jue " "\u8F97\u8F6C": "zhan zhuan " - "\u8F97\u8F67": "zhan ya " "\u98A4\u58F0": "zhan sheng " "\u986B\u8072": "zhan sheng " "\u98A4\u6817": "zhan li " @@ -1135,6 +1133,7 @@ script_to_roman: "\u8ECB\u8F25": "zha gun " "\u8F67\u94A2": "zha gang " "\u8ECB\u92FC": "zha gang " + "\u69A8\u4E7E": "zha gan " "\u66FE\u7956": "zeng zu " "\u66FE\u5B50": "zeng zi " "\u618E\u60E1": "zeng wu " @@ -1172,7 +1171,6 @@ script_to_roman: "\u96DC\u6C93": "za ta " "\u6742\u6C93": "za ta " "\u624E\u67D3": "za ran " - "\u624E\u56CA": "za nang " "\u54B1\u5BB6": "za jia " "\u7838\u592F": "za hang " "\u624E\u5E36": "za dai " @@ -1243,8 +1241,6 @@ script_to_roman: "\u9B63\u9C7C": "yu yu " "\u9B63\u9B5A": "yu yu " "\u5581\u5581": "yu yu " - "\u7572\u4E61": "yu xiang " - "\u756C\u9109": "yu xiang " "\u851A\u7E23": "yu xian " "\u851A\u53BF": "yu xian " "\u9B31\u585E": "yu se " @@ -1258,6 +1254,7 @@ script_to_roman: "\u611A\u6C13": "yu meng " "\u5C09\u7281": "yu li " "\u745C\u4F3D": "yu jia " + "\u9B5A\u4E7E": "yu gan " "\u5C09\u9072": "yu chi " "\u5C09\u8FDF": "yu chi " "\u9884\u535C": "yu bu " @@ -1282,7 +1279,6 @@ script_to_roman: "\u60A0\u9577": "you chang " "\u90F5\u5DEE": "you chai " "\u90AE\u5DEE": "you chai " - "\u5E7D\u660E": "you - ming " "\u96CD\u6B63": "Yongzheng " "\u6C38\uF914": "Yongle " "\u64C1\u585E": "yong se " @@ -1308,6 +1304,8 @@ script_to_roman: "\u9690\u6CA1": "yin mo " "\u6A83\u681D": "yin kuo " "\u7AA8\u4E95": "yin jing " + "\u9670\u7B4A": "yin jiao " + "\u9634\u7B4A": "yin jiao " "\u94F6\u884C": "yin hang " "\u9280\u884C": "yin hang " "\u9670\u4E7E": "yin gan " @@ -1369,8 +1367,6 @@ script_to_roman: "\u6CBF\u7740": "yan zhe " "\u9A8C\u8840": "yan xie " "\u9A57\u8840": "yan xie " - "\u814C\u9E79": "yan xian " - "\u814C\u54B8": "yan xian " "\u53AD\u60E1": "yan wu " "\u538C\u6076": "yan wu " "\u94C5\u5C71": "yan shan " @@ -1413,6 +1409,7 @@ script_to_roman: "\u5671\u982D": "xue tou " "\u5671\u5934": "xue tou " "\u8840\u585E": "xue se " + "\u8840\u6A59": "xue cheng " "\u5BA3\u7D71": "Xuantong " "\u5BA3\u5FB7": "Xuande " "\u55A7\u56A3": "xuan xiao " @@ -1455,8 +1452,6 @@ script_to_roman: "\u85AA\u7ED9": "xin ji " "\u85AA\u7D66": "xin ji " "\u4FE1\u5DEE": "xin chai " - "\u65B0\u9A6C": "xin - ma " - "\u65B0\u99AC": "xin - ma " "\u5E0C\u81D8": "Xila " "\u5E0C\u814A": "Xila " "\u89E3\u5ECC": "xie zhi " @@ -1479,6 +1474,7 @@ script_to_roman: "\u8096\u4F3C": "xiao si " "\u5C0F\u4E58": "xiao sheng " "\u524A\u7403": "xiao qiu " + "\u7B11\u7B4A": "xiao jiao " "\u524A\u5C16": "xiao jian " "\u9E79\u967D": "Xianyang " "\u9E79\u9633": "Xianyang " @@ -1528,6 +1524,10 @@ script_to_roman: "\u9592\u6643": "xian huang " "\u9592\u8A71": "xian hua " "\u9592\u626F": "xian che " + "\u9C9C\u5265": "xian bao " + "\u9BAE\u525D": "xian bao " + "\u73FE\u525D": "xian bao " + "\u73B0\u5265": "xian bao " "\u5EC8\u9580": "Xiamen " "\u53A6\u95E8": "Xiamen " "\u4E0B\u8C03": "xia tiao " @@ -1573,10 +1573,13 @@ script_to_roman: "\u4E4C\u4EC0": "wu shi " "\u5348\u89C9": "wu jiao " "\u5348\u89BA": "wu jiao " + "\u60E1\u6068": "wu hen " + "\u6076\u6068": "wu hen " "\u65E0\u988C": "wu he " "\u4E94\u4EE3": "Wu dai " "\u5434\u5821": "wu bu " "\u5433\u5821": "wu bu " + "\u63E1\u722A": "wo zhua " "\u6EAB\u5DDE": "Wenzhou " "\u6E29\u5DDE": "Wenzhou " "\u6C76\u840A": "Wenlai " @@ -1614,6 +1617,7 @@ script_to_roman: "\u5916\u4F20": "wai zhuan " "\u5916\u884C": "wai hang " "\u6316\u89D2": "wa jue " + "\u62D6\u62FD": "tuo zhuai " "\u62D3\u5C55": "tuo zhan " "\u62D6\u6C93": "tuo ta " "\u62D3\u64B2": "tuo pu " @@ -1643,12 +1647,12 @@ script_to_roman: "\u5F92\u88FC": "tu xi " "\u6295\u964D": "tou xiang " "\u5077\u9592": "tou xian " + "\u6295\u884C": "tou hang " "\u540C\u6CBB": "Tongzhi " "\u75DB\u60E1": "tong wu " "\u75DB\u6076": "tong wu " "\u7EDF\u7387": "tong shuai " "\u7D71\u7387": "tong shuai " - "\u901A\u4EC0": "tong shi " "\u540C\u884C": "tong hang " "\u505C\u6CCA": "ting bo " "\u8C03\u5634": "tiao zui " @@ -1758,7 +1762,6 @@ script_to_roman: "\u5929\u555F": "Tianqi " "\u5929\u6D25": "Tianjin " "\u586B\u585E": "tian se " - "\u817C\u8138": "tian lian " "\u5929\u548C": "tian hu " "\u751C\u6A59": "tian cheng " "\u5929\u957F": "tian chang " @@ -1854,6 +1857,9 @@ script_to_roman: "\u6CF0\u963F": "tai e " "\u592A\u963F": "tai e " "\u5854\u624E": "ta za " + "\u55D2\u55AA": "ta sang " + "\u55D2\u4E27": "ta sang " + "\u55D2\u7136": "ta ran " "\u5854\u52D2": "ta le " "\u5854\u524E": "ta cha " "\u5854\u5239": "ta cha " @@ -1874,6 +1880,9 @@ script_to_roman: "\u968F\u7740": "sui zhe " "\u5C3F\u812C": "sui pao " "\u5C3F\u6CE1": "sui pao " + "\u7EE5\u68F1": "sui ling " + "\u7D8F\u7A1C": "sui ling " + "\u849C\u7CDC": "suan mi " "\u7B97\u4E86": "suan le " "\u9178\u6A59": "suan cheng " "\u9001\u9084": "song huan " @@ -1901,6 +1910,8 @@ script_to_roman: "\u987A\u7740": "shun zhe " "\u9806\u8457": "shun zhe " "\U000251A7\u606F": "shun xi " + "\u6C34\u8457": "shui zhuo " + "\u6C34\u7740": "shui zhuo " "\u7761\u8457": "shui zhao " "\u7761\u7740": "shui zhao " "\u8BF4\u5BA2": "shui ke " @@ -1914,6 +1925,7 @@ script_to_roman: "\u6813\u585E": "shuan se " "\u7387\u76F4": "shuai zhi " "\u7387\u771F": "shuai zhen " + "\u7387\u6027": "shuai xing " "\u7387\u5148": "shuai xian " "\u7387\u7136": "shuai ran " "\u7387\u9886": "shuai ling " @@ -1956,8 +1968,6 @@ script_to_roman: "\u6642\u9577": "shi chang " "\u65F6\u957F": "shi chang " "\u4EC0\u83DC": "shi cai " - "\u4E16\u94F6": "shi - yin " - "\u4E16\u9280": "shi - yin " "\u6DF1\u5733": "Shenzhen " "\u700B\u967D": "Shenyang " "\u6E16\u967D": "Shenyang " @@ -1974,6 +1984,8 @@ script_to_roman: "\u52DD\u4F3C": "sheng si " "\u8072\u5436": "sheng na " "\u58F0\u5450": "sheng na " + "\u8056\u7B4A": "sheng jiao " + "\u5723\u7B4A": "sheng jiao " "\u751F\u9084": "sheng huan " "\u751F\u8FD8": "sheng huan " "\u8EAB\u8457": "shen zhuo " @@ -2029,7 +2041,6 @@ script_to_roman: "\u4E0A\u8272": "shang shai " "\u4E0A\u988C": "shang he " "\u5546\u884C": "shang hang " - "\u4E0A\u5408": "shang - he " "\u5C71\u6771": "Shandong " "\u5C71\u4E1C": "Shandong " "\u5C71\u6942": "shan zha " @@ -2071,14 +2082,13 @@ script_to_roman: "\u5112\u85CF": "ru zang " "\u5165\u884C": "ru hang " "\u8089\u7CDC": "rou mi " + "\u8089\u4E7E": "rou gan " "\u8089\u812F": "rou fu " "\u6EB6\u6CA1": "rong mo " "\u6EB6\u6C92": "rong mo " "\u620E\u884C": "rong hang " "\u5197\u957F": "rong chang " "\u5197\u9577": "rong chang " - "\u8363\u603B": "rong - zong " - "\u69AE\u7E3D": "rong - zong " "\u65E5\u672C": "Riben " "\u65E5\u6CA1": "ri mo " "\u65E5\u6C92": "ri mo " @@ -2087,6 +2097,7 @@ script_to_roman: "\u4EBA\u53C2": "ren shen " "\u6041\u9EBC": "ren me " "\u6041\u4E48": "ren me " + "\u4EBA\u884C": "ren hang " "\u6041\u822C": "ren ban " "\u71B1\u8ECB": "re zha " "\u70ED\u8F67": "re zha " @@ -2096,6 +2107,8 @@ script_to_roman: "\u9E87\u96C6": "qun ji " "\u7094\u70F4": "que ting " "\u7094\u70C3": "que ting " + "\u7F3A\u89C9": "que jiao " + "\u7F3A\u89BA": "que jiao " "\u828D\u9642": "que bei " "\u6CC9\u5DDE": "Quanzhou " "\u5168\u50B3": "quan zhuan " @@ -2103,6 +2116,7 @@ script_to_roman: "\u5168\u90FD": "quan dou " "\u5168\u957F": "quan chang " "\u5168\u9577": "quan chang " + "\u53BB\u722A": "qu zhua " "\u710C\u6CB9": "qu you " "\u77BF\u66C7": "qu tan " "\u77BF\u6619": "qu tan " @@ -2123,6 +2137,7 @@ script_to_roman: "\u6E05\u9592": "qing xian " "\u8F7B\u7387": "qing shuai " "\u8F15\u7387": "qing shuai " + "\u60C5\u52D2": "qing le " "\u7F44\u5331": "qing kui " "\u89AA\u5BB6": "qing jia " "\u4EB2\u5BB6": "qing jia " @@ -2159,6 +2174,7 @@ script_to_roman: "\u524D\u4F20": "qian zhuan " "\u728D\u70BA": "qian wei " "\u728D\u4E3A": "qian wei " + "\u7EA4\u624B": "qian shou " "\u6F5C\u6CA1": "qian mo " "\u6F5B\u6C92": "qian mo " "\u4E7E\u5609": "Qian Jia " @@ -2173,6 +2189,7 @@ script_to_roman: "\u88B7\u88A2": "qia pan " "\u5361\u76E4": "qia pan " "\u5361\u76D8": "qia pan " + "\u5361\u6263": "qia kou " "\u5361\u6BBC": "qia ke " "\u5361\u58F3": "qia ke " "\u5668\u6A02": "qi yue " @@ -2188,6 +2205,7 @@ script_to_roman: "\u57D4\u9E7D": "pu yan " "\u57D4\u76D0": "pu yan " "\u57D4\u5FC3": "pu xin " + "\u84B2\U0002B4F4": "pu mou " "\u57D4\u91CC": "pu li " "\u6734\u785D": "po xiao " "\u8FEB\u964D": "po xiang " @@ -2200,6 +2218,8 @@ script_to_roman: "\u8A55\u50B3": "ping zhuan " "\u6191\u8457": "ping zhe " "\u51ED\u7740": "ping zhe " + "\u8BC4\u5F39": "ping tan " + "\u8A55\u5F48": "ping tan " "\u6191\u85C9": "ping jie " "\u51ED\u85C9": "ping jie " "\u62DA\u8D34": "pin tie " @@ -2231,6 +2251,7 @@ script_to_roman: "\u70F9\u8C03": "peng tiao " "\u70F9\u8ABF": "peng tiao " "\u6367\u89D2": "peng jue " + "\u699C\u7B1E": "peng chi " "\u5674\u8584": "pen bo " "\u55B7\u8584": "pen bo " "\u914D\u6A02": "pei yue " @@ -2280,20 +2301,23 @@ script_to_roman: "\u6B50\u6942": "ou zha " "\u6B27\u6942": "ou zha " "\u543D\u7259": "ou ya " + "\u543D\u5440": "ou ya " + "\u6B50\u4E86": "ou le " + "\u6B27\u4E86": "ou le " "\u6B50\u6CCA": "ou bo " "\u6B27\u6CCA": "ou bo " - "\u6B50\u7F8E": "ou - mei " - "\u6B27\u7F8E": "ou - mei " "\u5973\u7EA2": "nü gong " "\u5973\u7D05": "nü gong " "\u632A\u5A01": "Nuowei " "\u8BFA\u9C81": "Nuolu " "\u8AFE\u9B6F": "Nuolu " "\u6696\u548C": "nuan huo " + "\u8FB2\u9592": "nong xian " "\u7EBD\u7EA6": "Niuyue " "\u7D10\u7D04": "Niuyue " "\u725B\u6D25": "Niujin " "\u725B\u4ED4": "niu zai " + "\u725B\u6252": "niu pa " "\u725B\u0062": "niu bi " "\u5C3C\u65E5": "Niri " "\u5BE7\u590F": "Ningxia " @@ -2307,6 +2331,7 @@ script_to_roman: "\u9CE5\u86E4": "niao ge " "\u9ECF\u8457": "nian zhuo " "\u9ECF\u7740": "nian zhuo " + "\u8F3E\u8ECB": "nian ya " "\u7C98\u7CEF": "nian nuo " "\u7C98\u9023": "nian lian " "\u7C98\u8FDE": "nian lian " @@ -2342,6 +2367,8 @@ script_to_roman: "\u96E3\u5F48": "nan tan " "\u96BE\u5F39": "nan tan " "\u5357\u5B8B": "Nan Song " + "\u5357\u957F": "nan chang " + "\u5357\u9577": "nan chang " "\u5976\u62FD": "nai zhuai " "\u7EB3\u964D": "na xiang " "\u7D0D\u964D": "na xiang " @@ -2406,6 +2433,7 @@ script_to_roman: "\u82D7\u6817": "Miaoli " "\u7F05\u7538": "Miandian " "\u7DEC\u7538": "Miandian " + "\u9766\u89A5": "mian tian " "\u9766\u8146": "mian tian " "\u9EB5\u7684": "mian di " "\u9762\u7684": "mian di " @@ -2413,6 +2441,7 @@ script_to_roman: "\u7DBF\u9577": "mian chang " "\u7EF5\u8584": "mian bo " "\u7DBF\u8584": "mian bo " + "\u8C1C\u4E4B": "mi zhi " "\u8C1C\u8BED": "mi yu " "\u8C1C\u6837": "mi yang " "\u8C1C\u56E2": "mi tuan " @@ -2422,6 +2451,7 @@ script_to_roman: "\u5F25\u52D2": "mi le " "\u7CDC\u721B": "mi lan " "\u7CDC\u70C2": "mi lan " + "\u7955\u8A23": "mi jue " "\u7955\u7D50": "mi jie " "\u7C73\u82BE": "mi fu " "\u7CDC\u8D39": "mi fei " @@ -2489,6 +2519,8 @@ script_to_roman: "\u5452\u5565": "m sha " "\u5638\u6C92": "m mei " "\u5452\u6CA1": "m mei " + "\u59C6\u5ABD": "m ma " + "\u59C6\u5988": "m ma " "\u6D1B\u967D": "Luoyang " "\u6D1B\u9633": "Luoyang " "\u7F85\u99AC": "Luoma " @@ -2524,12 +2556,16 @@ script_to_roman: "\u9732\u5BCC": "lou fu " "\u9732\u98CE": "lou feng " "\u9732\u98A8": "lou feng " + "\u9732\u9EDE": "lou dian " + "\u9732\u70B9": "lou dian " "\u9732\u5E95": "lou di " "\u9732\u919C": "lou chou " "\u9732\u4E11": "lou chou " "\u9732\u80CC": "lou bei " "\u9732\u767D": "lou bai " "\u9686\u6176": "Longqing " + "\u9F99\u5729": "long xu " + "\u9F8D\u5729": "long xu " "\u5F04\u5802": "long tang " "\u7C60\u6ABB": "long jian " "\u7B3C\u69DB": "long jian " @@ -2566,6 +2602,8 @@ script_to_roman: "\u7CE7\u884C": "liang hang " "\u7CAE\u884C": "liang hang " "\u667E\u4E7E": "liang gan " + "\u5169\u91CD": "liang chong " + "\u4E24\u91CD": "liang chong " "\u5229\u5179": "li zi " "\u7ACB\u50B3": "li zhuan " "\u7ACB\u4F20": "li zhuan " @@ -2576,6 +2614,7 @@ script_to_roman: "\u529B\u62D3": "li tuo " "\u91CC\u5F04": "li long " "\u674E\u9002": "li kuo " + "\u7ACB\u7B4A": "li jiao " "\u7282\u976C": "li jian " "\u7281\u976C": "li jian " "\u701D\u4E7E": "li gan " @@ -2651,6 +2690,7 @@ script_to_roman: "\u82E6\u53C2": "ku shen " "\u67AF\u4E7E": "ku gan " "\u82E6\u5DEE": "ku chai " + "\u6010\u6117": "kou mao " "\u4F5D\u7780": "kou mao " "\u53E3\u89D2": "kou jue " "\u5B54\u5B50": "Kongzi " @@ -2700,15 +2740,20 @@ script_to_roman: "\u5580\u5693": "ka cha " "\u5494\u5693": "ka cha " "\u006B\u4ED4": "k zai " + "\u8ECD\u6A02": "jun yue " + "\u519B\u4E50": "jun yue " "\u96CB\u8B7D": "jun yu " "\u96BD\u8A89": "jun yu " "\u9F9F\u88C2": "jun lie " "\u9F9C\u88C2": "jun lie " "\u89D2\u9010": "jue zhu " "\u89D2\u8272": "jue se " + "\u8173\u8272": "jue se " + "\u811A\u8272": "jue se " "\u89D2\u529B": "jue li " "\u7EDD\u4E86": "jue le " "\u7D55\u4E86": "jue le " + "\u89D2\u53E3": "jue kou " "\u5014\u5F3A": "jue jiang " "\u5014\u5F37": "jue jiang " "\u89D2\u5993": "jue ji " @@ -2791,9 +2836,9 @@ script_to_roman: "\u57FA\u9686": "Jilong " "\u5409\u6797": "Jilin " "\u85C9\u8457": "jie zhe " - "\u85C9\u7740": "jie zhe " "\u63A5\u8457": "jie zhe " "\u63A5\u7740": "jie zhe " + "\u501F\u7740": "jie zhe " "\u7ED3\u624E": "jie za " "\u85C9\u7531": "jie you " "\u85C9\u4EE5": "jie yi " @@ -2809,7 +2854,6 @@ script_to_roman: "\u6854\u6897": "jie geng " "\u6854\u69D4": "jie gao " "\u85C9\u6B64": "jie ci " - "\u63A5\u9001": "jie - song " "\u57FA\u7763": "Jidu " "\u5609\u7FA9": "Jiayi " "\u5609\u4E49": "Jiayi " @@ -2835,6 +2879,8 @@ script_to_roman: "\u53EB\u56A3": "jiao xiao " "\u4EA4\u60E1": "jiao wu " "\u4EA4\u6076": "jiao wu " + "\u56BC\u982D": "jiao tou " + "\u56BC\u5934": "jiao tou " "\u6821\u91CB": "jiao shi " "\u6821\u91CA": "jiao shi " "\u56BC\u820C": "jiao she " @@ -2938,6 +2984,8 @@ script_to_roman: "\u9E21\u6252": "ji pa " "\u96DE\u6252": "ji pa " "\u5947\u5076": "ji ou " + "\u7C4D\u6CA1": "ji mo " + "\u7C4D\u6C92": "ji mo " "\u6280\u5006": "ji liang " "\u6280\u4FE9": "ji liang " "\u4F0E\u5006": "ji liang " @@ -2956,7 +3004,6 @@ script_to_roman: "\u7E6B\u6CCA": "ji bo " "\u7CFB\u6CCA": "ji bo " "\u7620\u8584": "ji bo " - "\u0069\u4EBA": "i - ren " "\u6D3B\u8457": "huo zhe " "\u6D3B\u7740": "huo zhe " "\u6D3B\u4F3C": "huo si " @@ -2995,6 +3042,8 @@ script_to_roman: "\u9084\u539F": "huan yuan " "\u8FD8\u613F": "huan yuan " "\u8FD8\u539F": "huan yuan " + "\u9084\u967D": "huan yang " + "\u8FD8\u9633": "huan yang " "\u9084\u9109": "huan xiang " "\u8FD8\u4E61": "huan xiang " "\u9084\u5E2D": "huan xi " @@ -3052,6 +3101,8 @@ script_to_roman: "\u5FFD\u5730": "hu de " "\u5F27\u957F": "hu chang " "\u5F27\u9577": "hu chang " + "\u5F8C\u50B3": "hou zhuan " + "\u540E\u4F20": "hou zhuan " "\u539A\u6734": "hou po " "\u539A\u8584": "hou bo " "\u5F18\u6CBB": "Hongzhi " @@ -3087,6 +3138,7 @@ script_to_roman: "\u5475\u62A4": "he hu " "\u5475\u559D": "he he " "\u5475\u5475": "he he " + "\u6CB3\u6D8C": "he chong " "\u5475\u65A5": "he chi " "\u5475\u53F1": "he chi " "\u597D\u60E1": "hao wu " @@ -3100,6 +3152,8 @@ script_to_roman: "\u884C\u9577": "hang zhang " "\u884C\u68E7": "hang zhan " "\u884C\u6808": "hang zhan " + "\u884C\u54E1": "hang yuan " + "\u884C\u5458": "hang yuan " "\u884C\u8BED": "hang yu " "\u884C\u8A9E": "hang yu " "\u884C\u696D": "hang ye " @@ -3136,6 +3190,7 @@ script_to_roman: "\u884C\u5F53": "hang dang " "\u884C\u8F88": "hang bei " "\u884C\u8F29": "hang bei " + "\u310F\u3124": "hang " "\u6F22\u5821": "Hanbao " "\u6C49\u5821": "Hanbao " "\u542B\u62EC": "han gua " @@ -3152,6 +3207,7 @@ script_to_roman: "\u56FD\u4E50": "guo yue " "\u6E26\u967D": "guo yang " "\u6DA1\u9633": "guo yang " + "\u679C\u4E7E": "guo gan " "\u679C\u812F": "guo fu " "\u8D35\u5DDE": "Guizhou " "\u8CB4\u5DDE": "Guizhou " @@ -3201,7 +3257,6 @@ script_to_roman: "\u516C\u77F3": "gong dan " "\u516C\u5DEE": "gong chai " "\u55CA\u5425": "gong bu " - "\u516C\u5A46": "gong - po " "\u66F4\u5352": "geng zu " "\u6897\u54BD": "geng ye " "\u54FD\u54BD": "geng ye " @@ -3245,6 +3300,7 @@ script_to_roman: "\u5E72\u54D5": "gan yue " "\u4E7E\u5666": "gan yue " "\u8D1B\u8A9E": "gan yu " + "\u8D1B\u6986": "gan yu " "\u4E7E\u8863": "gan yi " "\u4E7E\u8449": "gan ye " "\u4E7E\u766C": "gan xuan " @@ -3305,6 +3361,7 @@ script_to_roman: "\u621B\u7EB3": "ga na " "\u621B\u7D0D": "ga na " "\u5496\u55B1": "ga li " + "\u560E\u4E86": "ga le " "\u798F\u5DDE": "Fuzhou " "\u798F\u5EFA": "Fujian " "\u9644\u8457": "fu zhuo " @@ -3317,7 +3374,6 @@ script_to_roman: "\u6D6E\u8584": "fu bo " "\u5FA9\u8F9F": "fu bi " "\u590D\u8F9F": "fu bi " - "\u8D74\u7F8E": "fu - mei " "\u82AC\u862D": "Fenlan " "\u82AC\u5170": "Fenlan " "\u8702\u87AB": "feng zhe " @@ -3335,6 +3391,8 @@ script_to_roman: "\u7793\u89C9": "fen jiao " "\u7793\u89BA": "fen jiao " "\u5206\u884C": "fen hang " + "\u98DE\u631D": "fei zhua " + "\u98DB\u64BE": "fei zhua " "\u975E\u6D32": "Fei Zhou " "\u975E\u5F97": "fei dei " "\u80A5\u5DEE": "fei chai " @@ -3345,6 +3403,7 @@ script_to_roman: "\u65B9\u7565": "fang lüe " "\u653E\u9084": "fang huan " "\u653E\u8FD8": "fang huan " + "\u5F77\u4F5B": "fang fu " "\u4EFF\u4F5B": "fang fu " "\u8303\u7E1D": "fan zhen " "\u53CD\u7701": "fan xing " @@ -3383,7 +3442,6 @@ script_to_roman: "\u963F\u80F6": "e jiao " "\u963F\u9644": "e fu " "\u963F\u5835": "e du " - "\u0065\u4EBA": "e - ren " "\u9A6E\u5B50": "duo zi " "\u99B1\u5B50": "duo zi " "\u591A\u91CD": "duo chong " @@ -3457,6 +3515,7 @@ script_to_roman: "\u5F97\u8981": "dei yao " "\u5F97\u8667": "dei kui " "\u5F97\u4E8F": "dei kui " + "\u5F97\u770B": "dei kan " "\u5FB7\u570B": "Deguo " "\u5FB7\u56FD": "Deguo " "\u5F97\u8457": "de zhao " @@ -3476,10 +3535,10 @@ script_to_roman: "\u6C39\u4ED4": "dang zai " "\u9EE8\u53C3": "dang shen " "\u515A\u53C2": "dang shen " + "\u81BB\u4E2D": "dan zhong " "\u4E39\u53C3": "dan shen " "\u4E39\u53C2": "dan shen " "\u65E6\u89D2": "dan jue " - "\u77F3\u9F13": "dan gu " "\u55AE\u927B": "dan ge " "\u8CE7\u4F5B": "dan fo " "\u6FB9\u6CCA": "dan bo " @@ -3510,6 +3569,8 @@ script_to_roman: "\u6512\u7C07": "cuan cu " "\u7C97\u7387": "cu shuai " "\u7EF0\u7EA6": "chuo yue " + "\u5A65\u7EA6": "chuo yue " + "\u5A65\u7D04": "chuo yue " "\u555C\u996E": "chuo yin " "\u555C\u98F2": "chuo yin " "\u555C\u6CE3": "chuo qi " @@ -3546,6 +3607,8 @@ script_to_roman: "\u91CD\u6574": "chong zheng " "\u91CD\u632F": "chong zhen " "\u91CD\u9020": "chong zao " + "\u91CD\u8F7D": "chong zai " + "\u91CD\u8F09": "chong zai " "\u91CD\u53C8": "chong you " "\u91CD\u6620": "chong ying " "\u91CD\u5F71": "chong ying " @@ -3610,6 +3673,7 @@ script_to_roman: "\u91CD\u9ECE": "chong li " "\u91CD\u6765": "chong lai " "\u91CD\u4F86": "chong lai " + "\u91CD\u520A": "chong kan " "\u91CD\u958B": "chong kai " "\u91CD\u5F00": "chong kai " "\u91CD\u805A": "chong ju " @@ -3687,11 +3751,13 @@ script_to_roman: "\u68A3\u6811": "chen shu " "\u7A31\u9322": "chen qian " "\u79F0\u94B1": "chen qian " + "\u6C88\u6EBA": "chen ni " "\u6C89\u6CA1": "chen mo " "\u6C89\u6C92": "chen mo " "\u6C89\u75FE": "chen ke " "\u7A31\u591A": "chen duo " "\u79F0\u591A": "chen duo " + "\u6C88\u6FB1": "chen dian " "\u546B\u5685": "che ru " "\u8F66\u884C": "che hang " "\u8ECA\u884C": "che hang " @@ -3712,16 +3778,22 @@ script_to_roman: "\u9577\u5B89": "Chang'an " "\u957F\u8DB3": "chang zu " "\u9577\u8DB3": "chang zu " + "\u957F\u6D32": "chang zhou " + "\u9577\u6D32": "chang zhou " "\u957F\u81F3": "chang zhi " "\u957F\u6CBB": "chang zhi " "\u9577\u81F3": "chang zhi " "\u9577\u6CBB": "chang zhi " "\u957F\u5F81": "chang zheng " "\u9577\u5F81": "chang zheng " + "\u957F\u9488": "chang zhen " + "\u9577\u91DD": "chang zhen " "\u957F\u7167": "chang zhao " "\u9577\u7167": "chang zhao " "\u957F\u8FDC": "chang yuan " + "\u957F\u57A3": "chang yuan " "\u9577\u9060": "chang yuan " + "\u9577\u57A3": "chang yuan " "\u957F\u4E8E": "chang yu " "\u9577\u65BC": "chang yu " "\u957F\u6905": "chang yi " @@ -3732,6 +3804,8 @@ script_to_roman: "\u957F\u591C": "chang ye " "\u9577\u91CE": "chang ye " "\u9577\u591C": "chang ye " + "\u957F\u9633": "chang yang " + "\u9577\u967D": "chang yang " "\u5018\u4F6F": "chang yang " "\u957F\u70DF": "chang yan " "\u9577\u7159": "chang yan " @@ -3767,6 +3841,8 @@ script_to_roman: "\u957F\u53F9": "chang tan " "\u9577\u8AC7": "chang tan " "\u9577\u5606": "chang tan " + "\u957F\u6CF0": "chang tai " + "\u9577\u6CF0": "chang tai " "\u957F\u987A": "chang shun " "\u9577\u9806": "chang shun " "\u957F\u5BFF": "chang shou " @@ -3792,8 +3868,10 @@ script_to_roman: "\u9577\u88D9": "chang qun " "\u957F\u62F3": "chang quan " "\u9577\u62F3": "chang quan " + "\u957F\u9752": "chang qing " "\u957F\u6E05": "chang qing " "\u957F\u60C5": "chang qing " + "\u9577\u9752": "chang qing " "\u9577\u6E05": "chang qing " "\u9577\u60C5": "chang qing " "\u957F\u67AA": "chang qiang " @@ -3832,6 +3910,8 @@ script_to_roman: "\u9577\u6A02": "chang le " "\u957F\u5ECA": "chang lang " "\u9577\u5ECA": "chang lang " + "\u957F\u5BBD": "chang kuan " + "\u9577\u5BEC": "chang kuan " "\u957F\u88E4": "chang ku " "\u9577\u8932": "chang ku " "\u957F\u7A7A": "chang kong " @@ -3880,8 +3960,6 @@ script_to_roman: "\u9577\u7B1B": "chang di " "\u957F\u51F3": "chang deng " "\u9577\u51F3": "chang deng " - "\u957F\u5FB7": "chang de " - "\u9577\u5FB7": "chang de " "\u957F\u8FBE": "chang da " "\u9577\u9054": "chang da " "\u957F\u5B58": "chang cun " @@ -3903,7 +3981,9 @@ script_to_roman: "\u957F\u7F16": "chang bian " "\u9577\u7DE8": "chang bian " "\u957F\u73ED": "chang ban " + "\u957F\u677F": "chang ban " "\u9577\u73ED": "chang ban " + "\u9577\u677F": "chang ban " "\u957F\u767D": "chang bai " "\u9577\u767D": "chang bai " "\u957F\u6309": "chang an " @@ -3997,9 +4077,10 @@ script_to_roman: "\u8584\u9727": "bo wu " "\u8584\u96FE": "bo wu " "\u6CCA\u4F4D": "bo wei " + "\u6CCA\u982D": "bo tou " + "\u6CCA\u5934": "bo tou " "\u8584\u7530": "bo tian " "\u6CCA\u677E": "bo song " - "\u767E\u8272": "bo se " "\u8584\u5F31": "bo ruo " "\u822C\u82E5": "bo re " "\u8584\u60C5": "bo qing " @@ -4023,8 +4104,12 @@ script_to_roman: "\u8584\u539A": "bo hou " "\u8584\u8377": "bo he " "\u8584\u6D77": "bo hai " + "\u8116\u9888": "bo geng " + "\u8116\u9838": "bo geng " + "\u4F2F\u90FD": "bo dou " "\u8584\u5730": "bo di " "\u8584\u5F85": "bo dai " + "\u6CCA\u8239": "bo chuan " "\u8584\u916C": "bo chou " "\u6CCA\u8F66": "bo che " "\u6CCA\u8ECA": "bo che " @@ -37206,6 +37291,7 @@ script_to_roman: "\u38AC": "hong " "\u35E2": "hong " "\u3593": "hong " + "\u5677": "hm " "\U0002A3D3": "heng " "\U0002966F": "heng " "\U00027752": "heng " @@ -37256,7 +37342,6 @@ script_to_roman: "\u62EB": "hen " "\u6068": "hen " "\u5F88": "hen " - "\u5677": "hen " "\u4F77": "hen " "\u44F3": "hen " "\u3BCA": "hen " @@ -37656,6 +37741,7 @@ script_to_roman: "\u402A": "hang " "\u3C20": "hang " "\u391A": "hang " + "\u310F": "hang " "\U00031F1E": "han " "\U00031276": "han " "\U0003124B": "han " @@ -40213,7 +40299,6 @@ script_to_roman: "\U00025F48": "feng " "\U00025DDC": "feng " "\U000256DD": "feng " - "\U0002536E": "feng " "\U00025292": "feng " "\U00024580": "feng " "\U00023FDD": "feng " @@ -40254,7 +40339,6 @@ script_to_roman: "\u7D98": "feng " "\u7BC8": "feng " "\u78B8": "feng " - "\u781C": "feng " "\u76FD": "feng " "\u760B": "feng " "\u75AF": "feng " @@ -40262,6 +40346,7 @@ script_to_roman: "\u7412": "feng " "\u7326": "feng " "\u728E": "feng " + "\u7148": "feng " "\u7128": "feng " "\u70FD": "feng " "\u7043": "feng " @@ -40281,15 +40366,10 @@ script_to_roman: "\u5CF0": "feng " "\u5CEF": "feng " "\u5C01": "feng " - "\u5BF7": "feng " "\u59A6": "feng " - "\u5949": "feng " "\u5906": "feng " - "\u5838": "feng " - "\u552A": "feng " "\u51EE": "feng " "\u51EC": "feng " - "\u51E8": "feng " "\u51E4": "feng " "\u50FC": "feng " "\u5051": "feng " @@ -40300,20 +40380,26 @@ script_to_roman: "\u47EA": "feng " "\u465C": "feng " "\u45EC": "feng " - "\u44A0": "feng " "\u43CE": "feng " - "\u4031": "feng " "\u3DED": "feng " "\u3D6F": "feng " "\u3980": "feng " "\u385D": "feng " "\u3826": "feng " "\u3702": "feng " - "\u51AF": "Feng " - "\u36D4": "feng " "\u34A5": "feng " "\u99AE": "Feng " - "\u7148": "feng " + "\u51AF": "Feng " + "\U0002536E": "feng " + "\u781C": "feng " + "\u5BF7": "feng " + "\u5949": "feng " + "\u5838": "feng " + "\u552A": "feng " + "\u51E8": "feng " + "\u44A0": "feng " + "\u4031": "feng " + "\u36D4": "feng " "\u343D": "feng " "\U0003162E": "fen " "\U000312F1": "fen "