comparison mupdf-source/thirdparty/harfbuzz/src/hb-ot-shaper-use-machine.rl @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*
2 * Copyright © 2015 Mozilla Foundation.
3 * Copyright © 2015 Google, Inc.
4 *
5 * This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29 #ifndef HB_OT_SHAPER_USE_MACHINE_HH
30 #define HB_OT_SHAPER_USE_MACHINE_HH
31
32 #include "hb.hh"
33
34 #include "hb-ot-shaper-syllabic.hh"
35
36 /* buffer var allocations */
37 #define use_category() ot_shaper_var_u8_category()
38
39 #define USE(Cat) use_syllable_machine_ex_##Cat
40
41 enum use_syllable_type_t {
42 use_virama_terminated_cluster,
43 use_sakot_terminated_cluster,
44 use_standard_cluster,
45 use_number_joiner_terminated_cluster,
46 use_numeral_cluster,
47 use_symbol_cluster,
48 use_hieroglyph_cluster,
49 use_broken_cluster,
50 use_non_cluster,
51 };
52
53 %%{
54 machine use_syllable_machine;
55 alphtype unsigned char;
56 write exports;
57 write data;
58 }%%
59
60 %%{
61
62 # Categories used in the Universal Shaping Engine spec:
63 # https://docs.microsoft.com/en-us/typography/script-development/use
64
65 export O = 0; # OTHER
66
67 export B = 1; # BASE
68 export N = 4; # BASE_NUM
69 export GB = 5; # BASE_OTHER
70 export CGJ = 6; # CGJ
71 export SUB = 11; # CONS_SUB
72 export H = 12; # HALANT
73
74 export HN = 13; # HALANT_NUM
75 export ZWNJ = 14; # Zero width non-joiner
76 export WJ = 16; # Word joiner
77 export R = 18; # REPHA
78 export CS = 43; # CONS_WITH_STACKER
79 export IS = 44; # INVISIBLE_STACKER
80 export Sk = 48; # SAKOT
81 export G = 49; # HIEROGLYPH
82 export J = 50; # HIEROGLYPH_JOINER
83 export SB = 51; # HIEROGLYPH_SEGMENT_BEGIN
84 export SE = 52; # HIEROGLYPH_SEGMENT_END
85 export HVM = 53; # HALANT_OR_VOWEL_MODIFIER
86
87 export FAbv = 24; # CONS_FINAL_ABOVE
88 export FBlw = 25; # CONS_FINAL_BELOW
89 export FPst = 26; # CONS_FINAL_POST
90 export MAbv = 27; # CONS_MED_ABOVE
91 export MBlw = 28; # CONS_MED_BELOW
92 export MPst = 29; # CONS_MED_POST
93 export MPre = 30; # CONS_MED_PRE
94 export CMAbv = 31; # CONS_MOD_ABOVE
95 export CMBlw = 32; # CONS_MOD_BELOW
96 export VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
97 export VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
98 export VPst = 35; # VOWEL_POST UIPC = Right
99 export VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
100 export VMAbv = 37; # VOWEL_MOD_ABOVE
101 export VMBlw = 38; # VOWEL_MOD_BELOW
102 export VMPst = 39; # VOWEL_MOD_POST
103 export VMPre = 23; # VOWEL_MOD_PRE
104 export SMAbv = 41; # SYM_MOD_ABOVE
105 export SMBlw = 42; # SYM_MOD_BELOW
106 export FMAbv = 45; # CONS_FINAL_MOD UIPC = Top
107 export FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom
108 export FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable
109
110
111 h = H | HVM | IS | Sk;
112
113 consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*;
114 medial_consonants = MPre? MAbv? MBlw? MPst?;
115 dependent_vowels = VPre* VAbv* VBlw* VPst* | H;
116 vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
117 final_consonants = FAbv* FBlw* FPst*;
118 final_modifiers = FMAbv* FMBlw* | FMPst?;
119
120 complex_syllable_start = (R | CS)? (B | GB);
121 complex_syllable_middle =
122 consonant_modifiers
123 medial_consonants
124 dependent_vowels
125 vowel_modifiers
126 (Sk B)*
127 ;
128 complex_syllable_tail =
129 complex_syllable_middle
130 final_consonants
131 final_modifiers
132 ;
133 number_joiner_terminated_cluster_tail = (HN N)* HN;
134 numeral_cluster_tail = (HN N)+;
135 symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
136
137 virama_terminated_cluster_tail =
138 consonant_modifiers
139 IS
140 ;
141 virama_terminated_cluster =
142 complex_syllable_start
143 virama_terminated_cluster_tail
144 ;
145 sakot_terminated_cluster_tail =
146 complex_syllable_middle
147 Sk
148 ;
149 sakot_terminated_cluster =
150 complex_syllable_start
151 sakot_terminated_cluster_tail
152 ;
153 standard_cluster =
154 complex_syllable_start
155 complex_syllable_tail
156 ;
157 tail = complex_syllable_tail | sakot_terminated_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail;
158 broken_cluster =
159 R?
160 (tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail)
161 ;
162
163 number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
164 numeral_cluster = N numeral_cluster_tail?;
165 symbol_cluster = (O | GB) tail?;
166 hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*;
167 other = any;
168
169 main := |*
170 virama_terminated_cluster ZWNJ? => { found_syllable (use_virama_terminated_cluster); };
171 sakot_terminated_cluster ZWNJ? => { found_syllable (use_sakot_terminated_cluster); };
172 standard_cluster ZWNJ? => { found_syllable (use_standard_cluster); };
173 number_joiner_terminated_cluster ZWNJ? => { found_syllable (use_number_joiner_terminated_cluster); };
174 numeral_cluster ZWNJ? => { found_syllable (use_numeral_cluster); };
175 symbol_cluster ZWNJ? => { found_syllable (use_symbol_cluster); };
176 hieroglyph_cluster ZWNJ? => { found_syllable (use_hieroglyph_cluster); };
177 broken_cluster ZWNJ? => { found_syllable (use_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; };
178 other => { found_syllable (use_non_cluster); };
179 *|;
180
181
182 }%%
183
184 #define found_syllable(syllable_type) \
185 HB_STMT_START { \
186 if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
187 for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
188 info[i].syllable() = (syllable_serial << 4) | syllable_type; \
189 syllable_serial++; \
190 if (syllable_serial == 16) syllable_serial = 1; \
191 } HB_STMT_END
192
193
194 template <typename Iter>
195 struct machine_index_t :
196 hb_iter_with_fallback_t<machine_index_t<Iter>,
197 typename Iter::item_t>
198 {
199 machine_index_t (const Iter& it) : it (it) {}
200 machine_index_t (const machine_index_t& o) : hb_iter_with_fallback_t<machine_index_t<Iter>,
201 typename Iter::item_t> (),
202 it (o.it), is_null (o.is_null) {}
203
204 static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
205 static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
206
207 typename Iter::item_t __item__ () const { return *it; }
208 typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
209 unsigned __len__ () const { return it.len (); }
210 void __next__ () { ++it; }
211 void __forward__ (unsigned n) { it += n; }
212 void __prev__ () { --it; }
213 void __rewind__ (unsigned n) { it -= n; }
214
215 void operator = (unsigned n)
216 {
217 assert (n == 0);
218 is_null = true;
219 }
220 explicit operator bool () { return !is_null; }
221
222 void operator = (const machine_index_t& o)
223 {
224 is_null = o.is_null;
225 unsigned index = (*it).first;
226 unsigned n = (*o.it).first;
227 if (index < n) it += n - index; else if (index > n) it -= index - n;
228 }
229 bool operator == (const machine_index_t& o) const
230 { return is_null ? o.is_null : !o.is_null && (*it).first == (*o.it).first; }
231 bool operator != (const machine_index_t& o) const { return !(*this == o); }
232
233 private:
234 Iter it;
235 bool is_null = false;
236 };
237 struct
238 {
239 template <typename Iter,
240 hb_requires (hb_is_iterable (Iter))>
241 machine_index_t<hb_iter_type<Iter>>
242 operator () (Iter&& it) const
243 { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
244 }
245 HB_FUNCOBJ (machine_index);
246
247
248
249 static bool
250 not_ccs_default_ignorable (const hb_glyph_info_t &i)
251 { return i.use_category() != USE(CGJ); }
252
253 static inline void
254 find_syllables_use (hb_buffer_t *buffer)
255 {
256 hb_glyph_info_t *info = buffer->info;
257 auto p =
258 + hb_iter (info, buffer->len)
259 | hb_enumerate
260 | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); },
261 hb_second)
262 | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
263 {
264 if (p.second.use_category() == USE(ZWNJ))
265 for (unsigned i = p.first + 1; i < buffer->len; ++i)
266 if (not_ccs_default_ignorable (info[i]))
267 return !_hb_glyph_info_is_unicode_mark (&info[i]);
268 return true;
269 })
270 | hb_enumerate
271 | machine_index
272 ;
273 auto pe = p + p.len ();
274 auto eof = +pe;
275 auto ts = +p;
276 auto te = +p;
277 unsigned int act HB_UNUSED;
278 int cs;
279 %%{
280 write init;
281 getkey (*p).second.second.use_category();
282 }%%
283
284 unsigned int syllable_serial = 1;
285 %%{
286 write exec;
287 }%%
288 }
289
290 #undef found_syllable
291
292 #endif /* HB_OT_SHAPER_USE_MACHINE_HH */