comparison mupdf-source/thirdparty/harfbuzz/src/ms-use/IndicSyllabicCategory-Additional.txt @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 # Override values For Indic_Syllabic_Category
2 # Not derivable
3 # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
4 # Updated for Unicode 10.0 by Andrew Glass 2017-07-25
5 # Updated for Unicode 12.1 by Andrew Glass 2019-05-24
6 # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
7 # Updated for Unicode 14.0 by Andrew Glass 2021-09-25
8 # Updated for Unicode 15.0 by Andrew Glass 2022-09-16
9
10 # ================================================
11 # OVERRIDES TO ASSIGNED VALUES
12 # ================================================
13
14 # Indic_Syllabic_Category=Bindu
15 193A ; Bindu # Mn LIMBU SIGN KEMPHRENG
16 AA29 ; Bindu # MnĀ  CHAM VOWEL SIGN AA
17 10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW
18
19 # ================================================
20
21 # Indic_Syllabic_Category=Consonant
22 19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
23 25CC ; Consonant # So DOTTED CIRCLE #Reassigned to allow it to cluster as a generic base
24
25 # ================================================
26
27 # Indic_Syllabic_Category=Consonant_Dead
28 0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga can form an independent cluster, but see #19
29
30 # ================================================
31
32 # Indic_Syllabic_Category=Consonant_Final_Modifier
33 1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN
34
35 # ================================================
36
37 # Indic_Syllabic_Category=Gemination_Mark
38 11134 ; Gemination_Mark # Mc CHAKMA MAAYYAA
39
40 # ================================================
41
42 # Indic_Syllabic_Category=Nukta
43 0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22
44 1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
45
46 # ================================================
47
48 # Indic_Syllabic_Category=Tone_Mark
49 1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN
50 1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
51
52 # ================================================
53
54 # Indic_Syllabic_Category=Vowel_Independent
55 AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA
56 AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA
57 AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
58
59 # ================================================
60 # ================================================
61 # VALUES NOT ASSIGNED IN Indic_Syllabic_Category
62 # ================================================
63 # ================================================
64
65 # Indic_Syllabic_Category=Consonant
66 0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
67 0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
68 0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
69 0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
70 1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
71 1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
72 180A ; Consonant # Po MONGOLIAN NIRUGU
73 1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
74 1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
75 2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
76 2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
77 10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
78 1057C..1058A ; Consonant # Lo [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
79 1058C..10592 ; Consonant # Lo [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
80 10594..10595 ; Consonant # Lo [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
81 10597..105A1 ; Consonant # Lo [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
82 105A3..105B1 ; Consonant # Lo [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
83 105B3..105B9 ; Consonant # Lo [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
84 105BB..105BC ; Consonant # Lo [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
85 10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
86 10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
87 10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
88 10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
89 10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
90 10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
91 10F70..10F81 ; Consonant # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
92 111DA ; Consonant # Lo SHARADA EKAM
93 #HIEROGLYPHS to be moved to new category
94 13000..1342F ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
95 #For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified.
96 13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
97 13441..13446 ; Consonant # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
98 16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
99 16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
100 16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class
101 18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
102 1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
103 1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
104 1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
105 1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
106 1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
107 1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
108 1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
109 1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA
110 1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A
111 1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
112 1E4D0..1E4EA ; Consonant # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
113 1E4EB ; Consonant # Lm NAG MUNDARI SIGN OJOD
114 1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
115 1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
116 1E94B ; Consonant # Lm ADLAM NASALIZATION MARK
117
118 # ================================================
119
120 # Indic_Syllabic_Category=Consonant_Placeholder
121 1880..1884 ; Consonant_Placeholder # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
122
123 # ================================================
124
125 # Indic_Syllabic_Category=Gemination_Mark
126 10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI
127
128 # ================================================
129
130 # Indic_Syllabic_Category=Modifying_Letter
131 FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios
132 16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION
133
134 # ================================================
135
136 # Indic_Syllabic_Category=Nukta
137 0859..085B ; Nukta # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
138 0F39 ; Nukta # Mn TIBETAN MARK TSA -PHRU # NOW IN UNICODE 10.0
139 1885..1886 ; Nukta # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
140 18A9 ; Nukta # Mn MONGOLIAN LETTER ALI GALI DAGALGA
141 10AE5..10AE6 ; Nukta # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
142 16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR
143 1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
144 1E944..1E94A ; Nukta # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
145 10F82..10F85 ; Nukta # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
146
147 # ================================================
148
149 # Indic_Syllabic_Category=Number
150 10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
151 10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
152 16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
153 1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
154 1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
155 1E4F0..1E4F9 ; Number # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
156 1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
157
158 # ================================================
159
160 # Indic_Syllabic_Category=Tone_Mark
161 07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
162 07FD ; Tone_Mark # Mn NKO DANTAYALAN
163 0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
164 17CF ; Tone_Mark # Mn KHMER SIGN AHSDA
165 10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA
166 10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
167 16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
168 16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
169 1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
170 1E2AE ; Tone_Mark # Mn TOTO SIGN RISING TONE
171 1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
172
173 # ================================================
174
175 # Indic_Syllabic_Category=Virama
176 2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER
177 #HIEROGLYPHS to be moved to new category
178 13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
179 13439..1343B ; Virama # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
180
181 # ================================================
182
183 # Indic_Syllabic_Category=Vowel_Independent
184 AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA
185 AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA
186 AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
187
188 # ================================================
189
190 # Indic_Syllabic_Category=Vowel_Dependent
191 0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE
192 10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
193 16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
194 1E4EC..1E4EF ; Vowel_Dependent # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
195
196 # ================================================
197
198 # Indic_Syllabic_Category=Cantillation_Mark
199
200 1CF8..1CF9 ; Cantillation_Mark # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
201 #HIEROGLYPHS to be moved to new category
202 13440 ; Cantillation_Mark # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
203 13447..13455 ; Cantillation_Mark # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
204
205 # ================================================
206
207 # Indic_Syllabic_Category=Symbol_Modifier
208 1B6B..1B73 ; Symbol_Modifier # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
209
210 # ================================================
211 # ================================================
212 # PROPERTIES NOT ASSIGNED IN Indic_Syllabic_Category
213 # ================================================
214 # ================================================
215
216 # USE, Extended_Syllabic_Category=Hieroglyph
217 # 13000..1342F ; Hieroglyph # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
218 # 13441..13446 ; Hieroglyph # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
219
220 # ================================================
221
222 # USE, Extended_Syllabic_Category=Hieroglyph_Joiner
223 # 13430..13436 ; Hieroglyph_Joiner # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
224 # 13439..1343B ; Hieroglyph_Joiner # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
225
226 # ================================================
227
228 # USE, Extended_Syllabic_Category=Hieroglyph_Mark_Begin
229 # 005B ; Hieroglyph_Mark_Begin # Ps LEFT SQUARE BRACKET
230 # 007B ; Hieroglyph_Mark_Begin # Ps LEFT CURLY BRACKET
231 # 27E6 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
232 # 27E8 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT ANGLE BRACKET
233 # 2E22 ; Hieroglyph_Mark_Begin # Ps TOP LEFT HALF BRACKET
234 # 2E24 ; Hieroglyph_Mark_Begin # Ps BOTTOM LEFT HALF BRACKET
235
236 # ================================================
237
238 # USE, Extended_Syllabic_Category=Hieroglyph_Mark_End
239 # 005D ; Hieroglyph_Mark_Begin # Pe RIGHT SQUARE BRACKET
240 # 007D ; Hieroglyph_Mark_Begin # Pe RIGHT CURLY BRACKET
241 # 27E7 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
242 # 27E9 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT ANGLE BRACKET
243 # 2E23 ; Hieroglyph_Mark_Begin # Pe TOP RIGHT HALF BRACKET
244 # 2E25 ; Hieroglyph_Mark_Begin # Pe BOTTOM RIGHT HALF BRACKET
245
246 # ================================================
247
248 # USE, Extended_Syllabic_Category=Hieroglyph_Segment_Begin
249 # 13437 ; Hieroglyph_Segment_Begin # Cf EGYPTIAN HIEROGLYPH BEGIN SEGMENT
250
251 # ================================================
252
253 # USE, Extended_Syllabic_Category=Hieroglyph_Segment_End
254 # 13438 ; Hieroglyph_Segment_End # Cf EGYPTIAN HIEROGLYPH END SEGMENT
255
256 # ================================================
257
258 # USE, Extended_Syllabic_Category=Hieroglyph_Mirror
259 # 13440 ; Hieroglyph_Mirror # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
260
261 # ================================================
262
263 # USE, Extended_Syllabic_Category=Hieroglyph_Modifier
264 # 13447..13455 ; Hieroglyph_Modifier # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
265
266 # ================================================
267
268 # eof