diff mupdf-source/thirdparty/harfbuzz/src/hb-ot-shaper-use-machine.rl @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/harfbuzz/src/hb-ot-shaper-use-machine.rl	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPER_USE_MACHINE_HH
+#define HB_OT_SHAPER_USE_MACHINE_HH
+
+#include "hb.hh"
+
+#include "hb-ot-shaper-syllabic.hh"
+
+/* buffer var allocations */
+#define use_category() ot_shaper_var_u8_category()
+
+#define USE(Cat) use_syllable_machine_ex_##Cat
+
+enum use_syllable_type_t {
+  use_virama_terminated_cluster,
+  use_sakot_terminated_cluster,
+  use_standard_cluster,
+  use_number_joiner_terminated_cluster,
+  use_numeral_cluster,
+  use_symbol_cluster,
+  use_hieroglyph_cluster,
+  use_broken_cluster,
+  use_non_cluster,
+};
+
+%%{
+  machine use_syllable_machine;
+  alphtype unsigned char;
+  write exports;
+  write data;
+}%%
+
+%%{
+
+# Categories used in the Universal Shaping Engine spec:
+# https://docs.microsoft.com/en-us/typography/script-development/use
+
+export O	= 0; # OTHER
+
+export B	= 1; # BASE
+export N	= 4; # BASE_NUM
+export GB	= 5; # BASE_OTHER
+export CGJ	= 6; # CGJ
+export SUB	= 11; # CONS_SUB
+export H	= 12; # HALANT
+
+export HN	= 13; # HALANT_NUM
+export ZWNJ	= 14; # Zero width non-joiner
+export WJ	= 16; # Word joiner
+export R	= 18; # REPHA
+export CS	= 43; # CONS_WITH_STACKER
+export IS	= 44; # INVISIBLE_STACKER
+export Sk	= 48; # SAKOT
+export G	= 49; # HIEROGLYPH
+export J	= 50; # HIEROGLYPH_JOINER
+export SB	= 51; # HIEROGLYPH_SEGMENT_BEGIN
+export SE	= 52; # HIEROGLYPH_SEGMENT_END
+export HVM	= 53; # HALANT_OR_VOWEL_MODIFIER
+
+export FAbv	= 24; # CONS_FINAL_ABOVE
+export FBlw	= 25; # CONS_FINAL_BELOW
+export FPst	= 26; # CONS_FINAL_POST
+export MAbv	= 27; # CONS_MED_ABOVE
+export MBlw	= 28; # CONS_MED_BELOW
+export MPst	= 29; # CONS_MED_POST
+export MPre	= 30; # CONS_MED_PRE
+export CMAbv	= 31; # CONS_MOD_ABOVE
+export CMBlw	= 32; # CONS_MOD_BELOW
+export VAbv	= 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
+export VBlw	= 34; # VOWEL_BELOW / VOWEL_BELOW_POST
+export VPst	= 35; # VOWEL_POST	UIPC = Right
+export VPre	= 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
+export VMAbv	= 37; # VOWEL_MOD_ABOVE
+export VMBlw	= 38; # VOWEL_MOD_BELOW
+export VMPst	= 39; # VOWEL_MOD_POST
+export VMPre	= 23; # VOWEL_MOD_PRE
+export SMAbv	= 41; # SYM_MOD_ABOVE
+export SMBlw	= 42; # SYM_MOD_BELOW
+export FMAbv	= 45; # CONS_FINAL_MOD	UIPC = Top
+export FMBlw	= 46; # CONS_FINAL_MOD	UIPC = Bottom
+export FMPst	= 47; # CONS_FINAL_MOD	UIPC = Not_Applicable
+
+
+h = H | HVM | IS | Sk;
+
+consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*;
+medial_consonants = MPre? MAbv? MBlw? MPst?;
+dependent_vowels = VPre* VAbv* VBlw* VPst* | H;
+vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
+final_consonants = FAbv* FBlw* FPst*;
+final_modifiers = FMAbv* FMBlw* | FMPst?;
+
+complex_syllable_start = (R | CS)? (B | GB);
+complex_syllable_middle =
+	consonant_modifiers
+	medial_consonants
+	dependent_vowels
+	vowel_modifiers
+	(Sk B)*
+;
+complex_syllable_tail =
+	complex_syllable_middle
+	final_consonants
+	final_modifiers
+;
+number_joiner_terminated_cluster_tail = (HN N)* HN;
+numeral_cluster_tail = (HN N)+;
+symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
+
+virama_terminated_cluster_tail =
+	consonant_modifiers
+	IS
+;
+virama_terminated_cluster =
+	complex_syllable_start
+	virama_terminated_cluster_tail
+;
+sakot_terminated_cluster_tail =
+	complex_syllable_middle
+	Sk
+;
+sakot_terminated_cluster =
+	complex_syllable_start
+	sakot_terminated_cluster_tail
+;
+standard_cluster =
+	complex_syllable_start
+	complex_syllable_tail
+;
+tail = complex_syllable_tail | sakot_terminated_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail;
+broken_cluster =
+	R?
+	(tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail)
+;
+
+number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
+numeral_cluster = N numeral_cluster_tail?;
+symbol_cluster = (O | GB) tail?;
+hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*;
+other = any;
+
+main := |*
+	virama_terminated_cluster ZWNJ?		=> { found_syllable (use_virama_terminated_cluster); };
+	sakot_terminated_cluster ZWNJ?		=> { found_syllable (use_sakot_terminated_cluster); };
+	standard_cluster ZWNJ?			=> { found_syllable (use_standard_cluster); };
+	number_joiner_terminated_cluster ZWNJ?	=> { found_syllable (use_number_joiner_terminated_cluster); };
+	numeral_cluster ZWNJ?			=> { found_syllable (use_numeral_cluster); };
+	symbol_cluster ZWNJ?			=> { found_syllable (use_symbol_cluster); };
+	hieroglyph_cluster ZWNJ?		=> { found_syllable (use_hieroglyph_cluster); };
+	broken_cluster ZWNJ?			=> { found_syllable (use_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; };
+	other					=> { found_syllable (use_non_cluster); };
+*|;
+
+
+}%%
+
+#define found_syllable(syllable_type) \
+  HB_STMT_START { \
+    if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
+    for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
+      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
+    syllable_serial++; \
+    if (syllable_serial == 16) syllable_serial = 1; \
+  } HB_STMT_END
+
+
+template <typename Iter>
+struct machine_index_t :
+  hb_iter_with_fallback_t<machine_index_t<Iter>,
+			  typename Iter::item_t>
+{
+  machine_index_t (const Iter& it) : it (it) {}
+  machine_index_t (const machine_index_t& o) : hb_iter_with_fallback_t<machine_index_t<Iter>,
+								       typename Iter::item_t> (),
+					       it (o.it), is_null (o.is_null) {}
+
+  static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
+  static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
+
+  typename Iter::item_t __item__ () const { return *it; }
+  typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
+  unsigned __len__ () const { return it.len (); }
+  void __next__ () { ++it; }
+  void __forward__ (unsigned n) { it += n; }
+  void __prev__ () { --it; }
+  void __rewind__ (unsigned n) { it -= n; }
+
+  void operator = (unsigned n)
+  {
+    assert (n == 0);
+    is_null = true;
+  }
+  explicit operator bool () { return !is_null; }
+
+  void operator = (const machine_index_t& o)
+  {
+    is_null = o.is_null;
+    unsigned index = (*it).first;
+    unsigned n = (*o.it).first;
+    if (index < n) it += n - index; else if (index > n) it -= index - n;
+  }
+  bool operator == (const machine_index_t& o) const
+  { return is_null ? o.is_null : !o.is_null && (*it).first == (*o.it).first; }
+  bool operator != (const machine_index_t& o) const { return !(*this == o); }
+
+  private:
+  Iter it;
+  bool is_null = false;
+};
+struct
+{
+  template <typename Iter,
+	    hb_requires (hb_is_iterable (Iter))>
+  machine_index_t<hb_iter_type<Iter>>
+  operator () (Iter&& it) const
+  { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
+}
+HB_FUNCOBJ (machine_index);
+
+
+
+static bool
+not_ccs_default_ignorable (const hb_glyph_info_t &i)
+{ return i.use_category() != USE(CGJ); }
+
+static inline void
+find_syllables_use (hb_buffer_t *buffer)
+{
+  hb_glyph_info_t *info = buffer->info;
+  auto p =
+    + hb_iter (info, buffer->len)
+    | hb_enumerate
+    | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); },
+		 hb_second)
+    | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
+		 {
+		   if (p.second.use_category() == USE(ZWNJ))
+		     for (unsigned i = p.first + 1; i < buffer->len; ++i)
+		       if (not_ccs_default_ignorable (info[i]))
+			 return !_hb_glyph_info_is_unicode_mark (&info[i]);
+		   return true;
+		 })
+    | hb_enumerate
+    | machine_index
+    ;
+  auto pe = p + p.len ();
+  auto eof = +pe;
+  auto ts = +p;
+  auto te = +p;
+  unsigned int act HB_UNUSED;
+  int cs;
+  %%{
+    write init;
+    getkey (*p).second.second.use_category();
+  }%%
+
+  unsigned int syllable_serial = 1;
+  %%{
+    write exec;
+  }%%
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPER_USE_MACHINE_HH */