diff mupdf-source/source/fitz/bidi-std.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/bidi-std.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,1173 @@
+// Extracted from Bidi.cpp - version 26
+
+// Reference implementation for Unicode Bidirectional Algorithm
+
+// Bidi include file
+#include "mupdf/fitz.h"
+#include "bidi-imp.h"
+
+#include <assert.h>
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/*------------------------------------------------------------------------
+	File: Bidi.Cpp
+
+	Description
+	-----------
+
+	Sample Implementation of the Unicode Bidirectional Algorithm as it
+	was revised by Revision 5 of the Unicode Technical Report # 9
+	(1999-8-17)
+
+	Verified for changes to the algorithm up through Unicode 5.2.0 (2009).
+
+	This implementation is organized into several passes, each implemen-
+	ting one or more of the rules of the Unicode Bidi Algorithm. The
+	resolution of Weak Types and of Neutrals each use a state table
+	approach.
+
+	Both a printf based interface and a Windows DlgProc are provided for
+	interactive testing.
+
+	A stress harness comparing this implementation (v24) to a Java based
+	implementation was used by Doug Felt to verify that the two
+	implementations produce identical results for all strings up to six
+	bidi classes and stochastic strings up to length 20.
+
+	Version 26 was verified by the author against the Unicode 5.2.0
+	file BidiTest.txt, which provides an exhaustive text of strings of
+	length 4 or less, but covers some important cases where the language
+	in UAX#9 had been clarified.
+
+	To see this code running in an actual Windows program,
+	download the free Unibook utility from http://unicode.org/unibook
+	The bidi demo is executed from the tools menu. It is build from
+	this source file.
+
+	Build Notes
+	-----------
+
+	To compile the sample implementation please set the #define
+	directives above so the correct headers get included. Not all the
+	files are needed for all purposes. For the commandline version
+	only bidi.h and bidi.cpp are needed.
+
+	The Win32 version is provided as a dialog procedure. To use as a
+	standalone program compile with the lbmain.cpp file. If all you
+	need is the ability to run the code "as is", you can instead download
+	the unibook utility from http://uincode.org/unibook/ which contains
+	the bidi demo compiled from this source file.
+
+	This code uses an extension to C++ that gives variables declared in
+	a for() statement function the same scope as the for() statement.
+	If your compiler does not support this extension, you may need to
+	move the declaration, e.g. int ich = 0; in front of the for statement.
+
+	Implementation Note
+	-------------------
+
+	NOTE: The Unicode Bidirectional Algorithm removes all explicit
+		formatting codes in rule X9, but states that this can be
+		simulated by conformant implementations. This implementation
+		attempts to demonstrate such a simulation
+
+		To demonstrate this, the current implementation does the
+		following:
+
+		in resolveExplicit()
+			- change LRE, LRO, RLE, RLO, PDF to BN
+			- assign nested levels to BN
+
+		in resolveWeak and resolveNeutrals
+			- assign L and R to BN's where they exist in place of
+			  sor and eor by changing the last BN in front of a
+			  level change to a strong type
+			- skip over BN's for the purpose of determining actions
+			- include BN in the count of deferred runs
+				which will resolve some of them to EN, AN and N
+
+		in resolveWhiteSpace
+			- set the level of any surviving BN to the base level,
+				or the level of the preceding character
+			- include LRE,LRO, RLE, RLO, PDF and BN in the count
+			   whitespace to be reset
+
+		This will result in the same order for non-BN characters as
+		if the BN characters had been removed.
+
+		The clean() function can be used to remove boundary marks for
+		verification purposes.
+
+	Notation
+	--------
+	Pointer variables generally start with the letter p
+	Counter variables generally start with the letter c
+	Index variables generally start with the letter i
+	Boolean variables generally start with the letter f
+
+	The enumerated bidirectional types have the same name as in the
+	description for the Unicode Bidirectional Algorithm
+
+	Using this code outside a demo context
+	--------------------------------------
+
+	The way the functions are broken down in this demo code is based
+	on the needs of the demo to show the evolution in internal state
+	as the algorithm proceeds. This obscures how the algorithm would
+	be used in practice. These are the steps:
+
+	1. Allocate a pair of arrays large enough to hold bidi class
+	   and calculated levels (one for each input character)
+
+	2. Provide your own function to assign directional types (bidi
+	   classes) corresponding to each character in the input,
+	   conflating ON, WS, S to N. Unlike the classify function in this
+	   demo, the input would be actual Unicode characters.
+
+	3. Process the first paragraph by calling BidiParagraph. That
+	   function changes B into BN and returns a length including the
+	   paragraph separator. (The iteration over multiple paragraphs
+	   is left as exercise for the reader).
+
+	4. Assign directional types again, but now assign specific types
+	   to whitespace characters.
+
+	5. Instead of reordering the input in place it is often desirable
+	   to calculate an array of offsets indicating the reordering.
+	   To that end, allocate such an array here and use it instead
+	   of the input array in the next step.
+
+	6. Resolve and reorder the lines by calling BidiLines. That
+	   function 'breaks' lines on LS characters. Provide an optional
+	   array of flags indicating the location of other line breaks as
+	   needed.
+
+	Update History
+	--------------
+	Version 24 is the initial published and verified version of this
+	reference implementation. Version 25 and its updates fix various
+	minor issues with the scaffolding used for demonstrating the
+	algorithm using pseudo-alphabets from the command line or dialog
+	box. No changes to the implementation of the actual bidi algorithm
+	are made in any of the minor updates to version 25. Version 26
+	also makes no change to the actual algorithm but was verified
+	against the official BidiTest.txt file for Unicode 5.2.0.
+
+	- updated pseudo-alphabet
+
+	- Last Revised 12-10-99 (25)
+
+	- enable demo mode for release builds - no other changes
+
+	- Last Revised 12-10-00 (25a)
+
+	- fix regression in pseudo alphabet use for Windows UI
+
+	- Last Revised 02-01-01 (25b)
+
+	- fixed a few comments, renamed a variable
+
+	- Last Revised 03-04-01 (25c)
+
+	- make base level settable, enable mirror by default,
+	fix dialog size
+
+	- Last Revised 06-02-01 (25e)
+
+	- fixed some comments
+
+	- Last Revised 09-29-01 (25f)
+
+	- fixed classification for LS,RLM,LRM in pseudo alphabet,
+	focus issues in UI, regression fix to commandline from 25(e)
+	fix DEMO switch
+
+	- Last Revised 11-07-01 (25g)
+
+	- fixed classification for plus/minus in pseudo alphabet
+	to track changes made in Unicode 4.0.1
+
+	- Last Revised 12-03-04 (25h)
+
+	- now compiles as dialog-only program for WINDOWS_UI==1
+	using new bidimain.cpp
+
+	- Last Revised 12-02-07 (25i)
+
+	- cleaned up whitespace and indenting in the source,
+	fixed two comments (table headers)
+
+	- Last Revised 15-03-07 (25j)
+
+	- named enumerations
+
+	- Last Revised 30-05-07 (25k)
+
+	- added usage notes, minor edits to comments, indentation, etc
+	throughout. Added the bidiParagraph function. Checked against
+	changes in the Unicode Bidi Algorithm for Unicode 5.2.0. No
+	changes needed to this implementation to match the values in
+	the BidiTest.txt file in the Unicode Character Database.
+	Minor fixes to dialog/windows proc, updated preprocessor directives.
+
+	- Last Revised 03-08-09 (26)
+
+	Credits:
+	-------
+	Written by: Asmus Freytag
+	Command line interface by: Rick McGowan
+	Verification (v24): Doug Felt
+
+	Disclaimer and legal rights:
+	---------------------------
+	Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved.
+	Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+	THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+	IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+	BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+	WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+	ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+
+	The file bid.rc is included in the software covered by the above.
+------------------------------------------------------------------------*/
+
+/* === HELPER FUNCTIONS AND DECLARATIONS ================================= */
+
+#define odd(x) ((x) & 1)
+
+/*----------------------------------------------------------------------
+	The following array maps character codes to types for the purpose of
+	this sample implementation. The legend string gives a human readable
+	explanation of the pseudo alphabet.
+
+	For simplicity, characters entered by buttons are given a 1:1 mapping
+	between their type and pseudo character value. Pseudo characters that
+	can be typed from the keyboard are explained in the legend string.
+
+	Use the Unicode Character Database for the real values in real use.
+ ---------------------------------------------------------------------*/
+
+enum
+{
+	chLS = 0x15
+};
+
+#if 0
+static const fz_bidi_chartype types_from_char[] =
+{
+// 0	   1	   2	   3	   4	   5	   6	   7	   8	   9	   a	   b	   c	   d	   e	   f
+BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_L,  BDI_R,  BDI_BN, BDI_BN, BDI_BN, BDI_S,  BDI_B,  BDI_S,  BDI_WS, BDI_B,  BDI_BN, BDI_BN, /*00-0f*/
+BDI_LRO,BDI_LRE,BDI_PDF,BDI_RLO,BDI_RLE,BDI_WS, BDI_L,  BDI_R,  BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_B,  BDI_B,  BDI_B,  BDI_S,  /*10-1f*/
+BDI_WS, BDI_ON, BDI_ON, BDI_ET, BDI_ET, BDI_ET, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ES, BDI_CS, BDI_ES, BDI_CS, BDI_ES, /*20-2f*/
+BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_AN, BDI_AN, BDI_AN, BDI_AN, BDI_CS, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, /*30-3f*/
+BDI_ON, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  /*40-4f*/
+BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_R,  BDI_LRE,BDI_ON, BDI_RLE,BDI_PDF,BDI_S,  /*50-5f*/
+BDI_NSM,BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  /*60-6f*/
+BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_L,  BDI_LRO,BDI_B,  BDI_RLO,BDI_BN, BDI_ON, /*70-7f*/
+};
+#endif
+
+/***************************************
+	Reverse, human readable reference:
+
+	LRM:	0x4
+	RLM:	0x5
+	  L:	0x16,a-z
+	LRE:	0x11,[
+	LRO:	0x10,{
+	  R:	0x17,G-Z
+	 AL:	A-F
+	RLE:	0x14,]
+	RLO:	0x13,}
+	PDF:	0x12,^
+	 EN:	0-5
+	 ES:	/,+,[hyphen]
+	 ET:	#,$,%
+	 AN:	6-9
+	 CS:	[comma],.,:
+	NSM:	`
+	 BN:	0x0-0x8,0xe,0xf,0x18-0x1b,~
+	  B:	0xa,0xd,0x1c-0x1e,|
+	  S:	0x9,0xb,0x1f,_
+	 WS:	0xc,0x15,[space]
+	 ON:	!,",&,',(,),*,;,<,=,>,?,@,\,0x7f
+****************************************/
+
+// === HELPER FUNCTIONS ================================================
+
+#ifdef BIDI_LINE_AT_A_TIME
+// reverse cch characters
+static
+void reverse(uint32_t *psz, int cch)
+{
+	uint32_t ch_temp;
+	int ich;
+
+	for (ich = 0; ich < --cch; ich++)
+	{
+		ch_temp = psz[ich];
+		psz[ich] = psz[cch];
+		psz[cch] = ch_temp;
+	}
+}
+#endif
+
+// Set a run of cval values at locations all prior to, but not including
+// iStart, to the new value nval.
+static
+void set_deferred_run(fz_bidi_chartype *pval, size_t cval, size_t iStart, fz_bidi_chartype nval)
+{
+	size_t i;
+
+	for (i = iStart; i > iStart - cval; )
+	{
+		pval[--i] = nval;
+	}
+}
+
+static
+void set_deferred_level_run(fz_bidi_level *pval, size_t cval, size_t iStart, fz_bidi_level nval)
+{
+	size_t i;
+
+	for (i = iStart; i > iStart - cval; )
+	{
+		pval[--i] = nval;
+	}
+}
+
+// === ASSIGNING BIDI CLASSES ============================================
+
+// === THE PARAGRAPH LEVEL ===============================================
+
+/*------------------------------------------------------------------------
+	Function: resolve_paragraphs
+
+	Resolves the input strings into blocks over which the algorithm
+	is then applied.
+
+	Implements Rule P1 of the Unicode Bidi Algorithm
+
+	Input: Text string
+		   Character count
+
+	Output: revised character count
+
+	Note:	This is a very simplistic function. In effect it restricts
+			the action of the algorithm to the first paragraph in the input
+			where a paragraph ends at the end of the first block separator
+			or at the end of the input text.
+
+------------------------------------------------------------------------*/
+size_t fz_bidi_resolve_paragraphs(fz_bidi_chartype *types, size_t cch)
+{
+	size_t ich;
+
+	// skip characters not of type B
+	for(ich = 0; ich < cch && types[ich] != BDI_B; ich++)
+		;
+	// stop after first B, make it a BN for use in the next steps
+	if (ich < cch && types[ich] == BDI_B)
+		types[ich++] = BDI_BN;
+	return ich;
+}
+
+#if 0
+/*------------------------------------------------------------------------
+	Function: base_level
+
+	Determines the base level
+
+	Implements rule P2 of the Unicode Bidi Algorithm.
+
+	Input: Array of directional classes
+		   Character count
+
+	Note: Ignores explicit embeddings
+------------------------------------------------------------------------*/
+static int base_level(const fz_bidi_chartype *pcls, int cch)
+{
+	int ich;
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		switch (pcls[ich])
+		{
+		// strong left
+		case BDI_L:
+			return 0;
+
+		// strong right
+		case BDI_R:
+		case BDI_AL:
+			return 1;
+		}
+	}
+	return 0;
+}
+#endif
+
+//====== RESOLVE EXPLICIT ================================================
+
+static fz_bidi_level greater_even(fz_bidi_level i)
+{
+	return odd(i) ? i + 1 : i + 2;
+}
+
+static fz_bidi_level greater_odd(fz_bidi_level i)
+{
+	return odd(i) ? i + 2 : i + 1;
+}
+
+static fz_bidi_chartype embedding_direction(fz_bidi_chartype level)
+{
+	return odd(level) ? BDI_R : BDI_L;
+}
+
+/*------------------------------------------------------------------------
+	Function: resolveExplicit
+
+	Recursively resolves explicit embedding levels and overrides.
+	Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
+
+	Input: Base embedding level and direction
+		   Character count
+
+	Output: Array of embedding levels
+		  Caller must allocate (one level per input character)
+
+	In/Out: Array of direction classes
+
+	Note: The function uses two simple counters to keep track of
+		  matching explicit codes and PDF. Use the default argument for
+		  the outermost call. The nesting counter counts the recursion
+		  depth and not the embedding level.
+------------------------------------------------------------------------*/
+size_t fz_bidi_resolve_explicit(fz_bidi_level level, fz_bidi_chartype dir, fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch,
+				fz_bidi_level n_nest)
+{
+	size_t ich;
+
+	// always called with a valid nesting level
+	// nesting levels are != embedding levels
+	int nLastValid = n_nest;
+
+	// check input values
+	assert(n_nest >= 0 && level >= 0 && level <= BIDI_LEVEL_MAX);
+
+	// process the text
+	for (ich = 0; ich < cch; ich++)
+	{
+		fz_bidi_chartype cls = pcls[ich];
+		switch (cls)
+		{
+		case BDI_LRO:
+		case BDI_LRE:
+			n_nest++;
+			if (greater_even(level) <= BIDI_LEVEL_MAX)
+			{
+				plevel[ich] = greater_even(level);
+				pcls[ich] = BDI_BN;
+				ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_LRE ? BDI_N : BDI_L),
+							&pcls[ich+1], &plevel[ich+1],
+							 cch - (ich+1), n_nest);
+				n_nest--;
+				continue;
+			}
+			cls = pcls[ich] = BDI_BN;
+			break;
+
+		case BDI_RLO:
+		case BDI_RLE:
+			n_nest++;
+			if (greater_odd(level) <= BIDI_LEVEL_MAX)
+			{
+				plevel[ich] = greater_odd(level);
+				pcls[ich] = BDI_BN;
+				ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_RLE ? BDI_N : BDI_R),
+								&pcls[ich+1], &plevel[ich+1],
+								 cch - (ich+1), n_nest);
+				n_nest--;
+				continue;
+			}
+			cls = pcls[ich] = BDI_BN;
+			break;
+
+		case BDI_PDF:
+			cls = pcls[ich] = BDI_BN;
+			if (n_nest)
+			{
+				if (nLastValid < n_nest)
+				{
+					n_nest--;
+				}
+				else
+				{
+					cch = ich; // break the loop, but complete body
+				}
+			}
+			break;
+		}
+
+		// Apply the override
+		if (dir != BDI_N)
+		{
+			cls = dir;
+		}
+		plevel[ich] = level;
+		if (pcls[ich] != BDI_BN)
+			pcls[ich] = cls;
+	}
+
+	return ich;
+}
+
+// === RESOLVE WEAK TYPES ================================================
+
+enum bidi_state // possible states
+{
+	xa,	//	arabic letter
+	xr,	//	right letter
+	xl,	//	left letter
+
+	ao,	//	arabic lett. foll by ON
+	ro,	//	right lett. foll by ON
+	lo,	//	left lett. foll by ON
+
+	rt,	//	ET following R
+	lt,	//	ET following L
+
+	cn,	//	EN, AN following AL
+	ra,	//	arabic number foll R
+	re,	//	european number foll R
+	la,	//	arabic number foll L
+	le,	//	european number foll L
+
+	ac,	//	CS following cn
+	rc,	//	CS following ra
+	rs,	//	CS,ES following re
+	lc,	//	CS following la
+	ls,	//	CS,ES following le
+
+	ret,	//	ET following re
+	let	//	ET following le
+} ;
+
+static const unsigned char state_weak[][10] =
+{
+	//	N,  L,  R,  AN, EN, AL,NSM, CS, ES, ET,
+/*xa*/  { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter		  */
+/*xr*/  { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter		   */
+/*xl*/  { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter			  */
+
+/*ao*/  { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/
+/*ro*/  { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
+/*lo*/  { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON  */
+
+/*rt*/  { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R		  */
+/*lt*/  { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L		  */
+
+/*cn*/  { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL	  */
+/*ra*/  { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R   */
+/*re*/  { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */
+/*la*/  { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L   */
+/*le*/  { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */
+
+/*ac*/  { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn		  */
+/*rc*/  { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra		  */
+/*rs*/  { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re	  */
+/*lc*/  { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la		  */
+/*ls*/  { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le	  */
+
+/*ret*/ { ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re		  */
+/*let*/ { lo, xl, xr, la, le, xa,let, lo, lo,let }  /* ET following le		  */
+
+};
+
+enum bidi_action // possible actions
+{
+	// primitives
+	IX = 0x100,				  // increment
+	XX = 0xF,					// no-op
+
+	// actions
+	xxx = (XX << 4) + XX,		// no-op
+	xIx = IX + xxx,			// increment run
+	xxN = (XX << 4) + BDI_ON,	// set current to N
+	xxE = (XX << 4) + BDI_EN,	// set current to EN
+	xxA = (XX << 4) + BDI_AN,	// set current to AN
+	xxR = (XX << 4) + BDI_R,	// set current to R
+	xxL = (XX << 4) + BDI_L,	// set current to L
+	Nxx = (BDI_ON << 4) + 0xF,	// set run to neutral
+	Axx = (BDI_AN << 4) + 0xF,	// set run to AN
+	ExE = (BDI_EN << 4) + BDI_EN,	// set run to EN, set current to EN
+	NIx = (BDI_ON << 4) + 0xF + IX,	// set run to N, increment
+	NxN = (BDI_ON << 4) + BDI_ON,	// set run to N, set current to N
+	NxR = (BDI_ON << 4) + BDI_R,	// set run to N, set current to R
+	NxE = (BDI_ON << 4) + BDI_EN,	// set run to N, set current to EN
+
+	AxA = (BDI_AN << 4) + BDI_AN,	// set run to AN, set current to AN
+	NxL = (BDI_ON << 4) + BDI_L,	// set run to N, set current to L
+	LxL = (BDI_L << 4) + BDI_L	// set run to L, set current to L
+};
+
+typedef uint16_t fz_bidi_action;
+
+static const fz_bidi_action action_weak[][10] =
+{
+	//   N,.. L,   R,  AN,  EN,  AL, NSM,  CS,..ES,  ET,
+/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter			*/
+/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter			 */
+/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter			 */
+
+/*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON	*/
+/*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON	*/
+/*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON	*/
+
+/*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R			*/
+/*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L			*/
+
+/*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following  AL	*/
+/*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R	*/
+/*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R	*/
+/*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L	*/
+/*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L	*/
+
+/*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn		 */
+/*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra		 */
+/*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re		*/
+/*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la		 */
+/*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le		*/
+
+/*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re			*/
+/*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }  /* ET following le			*/
+};
+
+static
+fz_bidi_chartype get_deferred_type(fz_bidi_action action)
+{
+	return (action >> 4) & 0xF;
+}
+
+static
+fz_bidi_chartype get_resolved_type(fz_bidi_action action)
+{
+	return action & 0xF;
+}
+
+/* Note on action table:
+
+	States can be of two kinds:
+	 - Immediate Resolution State, where each input token
+	   is resolved as soon as it is seen. These states have
+	   only single action codes (xxN) or the no-op (xxx)
+	   for static input tokens.
+	 - Deferred Resolution State, where input tokens either
+	   either extend the run (xIx) or resolve its Type (e.g. Nxx).
+
+	Input classes are of three kinds
+	 - Static Input Token, where the class of the token remains
+	   unchanged on output (AN, L, N, R)
+	 - Replaced Input Token, where the class of the token is
+	   always replaced on output (AL, BDI_BN, NSM, CS, ES, ET)
+	 - Conditional Input Token, where the class of the token is
+	   changed on output in some but not all cases (EN)
+
+	 Where tokens are subject to change, a double action
+	 (e.g. NxA, or NxN) is _required_ after deferred states,
+	 resolving both the deferred state and changing the current token.
+
+	These properties of the table are verified by assertions below.
+	This code is needed only during debugging and maintenance
+*/
+
+/*------------------------------------------------------------------------
+	Function: resolveWeak
+
+	Resolves the directionality of numeric and other weak character types
+
+	Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
+
+	Input: Array of embedding levels
+		   Character count
+
+	In/Out: Array of directional classes
+
+	Note: On input only these directional classes are expected
+		  AL, HL, R, L,  ON, BDI_BN, NSM, AN, EN, ES, ET, CS,
+------------------------------------------------------------------------*/
+void fz_bidi_resolve_weak(fz_context *ctx, fz_bidi_level baselevel, fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch)
+{
+	int state = odd(baselevel) ? xr : xl;
+	fz_bidi_chartype cls;
+	size_t ich;
+	fz_bidi_action action;
+	fz_bidi_chartype cls_run;
+	fz_bidi_chartype cls_new;
+
+	fz_bidi_level level = baselevel;
+
+	size_t cch_run = 0;
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		if (pcls[ich] > BDI_BN) {
+			fz_warn(ctx, "error: pcls[%zu] > BN (%d)\n", ich, pcls[ich]);
+		}
+
+		// ignore boundary neutrals
+		if (pcls[ich] == BDI_BN)
+		{
+			// must flatten levels unless at a level change;
+			plevel[ich] = level;
+
+			// lookahead for level changes
+			if (ich + 1 == cch && level != baselevel)
+			{
+				// have to fixup last BN before end of the loop, since
+				// its fix-upped value will be needed below the assert
+				pcls[ich] = embedding_direction(level);
+			}
+			else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BDI_BN)
+			{
+				// fixup LAST BN in front / after a level run to make
+				// it act like the SOR/EOR in rule X10
+				int newlevel = plevel[ich+1];
+				if (level > newlevel) {
+					newlevel = level;
+				}
+				plevel[ich] = newlevel;
+
+				// must match assigned level
+				pcls[ich] = embedding_direction(newlevel);
+				level = plevel[ich+1];
+			}
+			else
+			{
+				// don't interrupt runs
+				if (cch_run)
+				{
+					cch_run++;
+				}
+				continue;
+			}
+		}
+
+		assert(pcls[ich] <= BDI_BN);
+		cls = pcls[ich];
+
+		action = action_weak[state][cls];
+
+		// resolve the directionality for deferred runs
+		cls_run = get_deferred_type(action);
+		if (cls_run != XX)
+		{
+			set_deferred_run(pcls, cch_run, ich, cls_run);
+			cch_run = 0;
+		}
+
+		// resolve the directionality class at the current location
+		cls_new = get_resolved_type(action);
+		if (cls_new != XX)
+			pcls[ich] = cls_new;
+
+		// increment a deferred run
+		if (IX & action)
+			cch_run++;
+
+		state = state_weak[state][cls];
+	}
+
+	// resolve any deferred runs
+	// use the direction of the current level to emulate PDF
+	cls = embedding_direction(level);
+
+	// resolve the directionality for deferred runs
+	cls_run = get_deferred_type(action_weak[state][cls]);
+	if (cls_run != XX)
+		set_deferred_run(pcls, cch_run, ich, cls_run);
+}
+
+// === RESOLVE NEUTRAL TYPES ==============================================
+
+// action values
+enum neutral_action
+{
+	// action to resolve previous input
+	nL = BDI_L,		// resolve EN to L
+	En = 3 << 4,		// resolve neutrals run to embedding level direction
+	Rn = BDI_R << 4,	// resolve neutrals run to strong right
+	Ln = BDI_L << 4,	// resolved neutrals run to strong left
+	In = (1<<8),		// increment count of deferred neutrals
+	LnL = (1<<4)+BDI_L	// set run and EN to L
+};
+
+static fz_bidi_chartype
+get_deferred_neutrals(fz_bidi_action action, fz_bidi_level level)
+{
+	action = (action >> 4) & 0xF;
+	if (action == (En >> 4))
+		return embedding_direction(level);
+	else
+		return action;
+}
+
+static fz_bidi_chartype get_resolved_neutrals(fz_bidi_action action)
+{
+	action = action & 0xF;
+
+	/* RJW: The original code does:
+	 * if (action == In)
+	 *     return 0;
+	 * else
+	 *     return action;
+	 * BUT, this can never be true, as In == 256.
+	 * This upsets coverity. We fix this here, but include this note
+	 * so that we understand what changed in case we ever update to
+	 * a newer release of the bidirectional code.
+	 */
+	return action;
+}
+
+// state values
+enum neutral_state
+{
+	// new temporary class
+	r,	// R and characters resolved to R
+	l,	// L and characters resolved to L
+	rn,	// N preceded by right
+	ln,	// N preceded by left
+	a,	// AN preceded by left (the abbrev 'la' is used up above)
+	na	// N preceded by a
+} ;
+
+/*------------------------------------------------------------------------
+	Notes:
+
+	By rule W7, whenever a EN is 'dominated' by an L (including start of
+	run with embedding direction = L) it is resolved to, and further treated
+	as L.
+
+	This leads to the need for 'a' and 'na' states.
+------------------------------------------------------------------------*/
+
+static const int action_neutrals[][5] =
+{
+//	N,	L,	R, AN, EN, = cls
+					// state =
+	{In,  0,  0,  0,  0},		// r	right
+	{In,  0,  0,  0,  BDI_L},	// l	left
+
+	{In, En, Rn, Rn, Rn},		// rn	N preceded by right
+	{In, Ln, En, En, LnL},		// ln	N preceded by left
+
+	{In,  0,  0,  0,  BDI_L},	// a   AN preceded by left
+	{In, En, Rn, Rn, En}		// na	N  preceded by a
+} ;
+
+static const int state_neutrals[][5] =
+{
+//	 N, L,	R,	AN, EN = cls
+					// state =
+	{rn, l,	r,	r,	r},	// r   right
+	{ln, l,	r,	a,	l},	// l   left
+
+	{rn, l,	r,	r,	r},	// rn  N preceded by right
+	{ln, l,	r,	a,	l},	// ln  N preceded by left
+
+	{na, l,	r,	a,	l},	// a  AN preceded by left
+	{na, l,	r,	a,	l}	// na  N preceded by la
+} ;
+
+/*------------------------------------------------------------------------
+	Function: resolveNeutrals
+
+	Resolves the directionality of neutral character types.
+
+	Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
+
+	Input: Array of embedding levels
+		   Character count
+		   Baselevel
+
+	In/Out: Array of directional classes
+
+	Note: On input only these directional classes are expected
+		  R,  L,  N, AN, EN and BN
+
+		  W8 resolves a number of ENs to L
+------------------------------------------------------------------------*/
+void fz_bidi_resolve_neutrals(fz_bidi_level baselevel, fz_bidi_chartype *pcls, const fz_bidi_level *plevel, size_t cch)
+{
+	// the state at the start of text depends on the base level
+	int state = odd(baselevel) ? r : l;
+	fz_bidi_chartype cls;
+	size_t ich;
+	fz_bidi_chartype cls_run;
+
+	size_t cch_run = 0;
+	fz_bidi_level level = baselevel;
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		int action;
+		fz_bidi_chartype cls_new;
+
+		// ignore boundary neutrals
+		if (pcls[ich] == BDI_BN)
+		{
+			// include in the count for a deferred run
+			if (cch_run)
+				cch_run++;
+
+			// skip any further processing
+			continue;
+		}
+
+		assert(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed"
+		cls = pcls[ich];
+
+		action = action_neutrals[state][cls];
+
+		// resolve the directionality for deferred runs
+		cls_run = get_deferred_neutrals(action, level);
+		if (cls_run != BDI_N)
+		{
+			set_deferred_run(pcls, cch_run, ich, cls_run);
+			cch_run = 0;
+		}
+
+		// resolve the directionality class at the current location
+		cls_new = get_resolved_neutrals(action);
+		if (cls_new != BDI_N)
+			pcls[ich] = cls_new;
+
+		if (In & action)
+			cch_run++;
+
+		state = state_neutrals[state][cls];
+		level = plevel[ich];
+	}
+
+	// resolve any deferred runs
+	cls = embedding_direction(level);	// eor has type of current level
+
+	// resolve the directionality for deferred runs
+	cls_run = get_deferred_neutrals(action_neutrals[state][cls], level);
+	if (cls_run != BDI_N)
+		set_deferred_run(pcls, cch_run, ich, cls_run);
+}
+
+// === RESOLVE IMPLICITLY =================================================
+
+/*------------------------------------------------------------------------
+	Function: resolveImplicit
+
+	Recursively resolves implicit embedding levels.
+	Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
+
+	Input: Array of direction classes
+		   Character count
+		   Base level
+
+	In/Out: Array of embedding levels
+
+	Note: levels may exceed 15 on output.
+		  Accepted subset of direction classes
+		  R, L, AN, EN
+------------------------------------------------------------------------*/
+static const fz_bidi_level add_level[][4] =
+{
+	// L,  R,	AN, EN = cls
+					// level =
+/* even */ { 0,	1,	2,	2 },	// EVEN
+/* odd	*/ { 1,	0,	1,	1 }	// ODD
+
+};
+
+void fz_bidi_resolve_implicit(const fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch)
+{
+	size_t ich;
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		// cannot resolve bn here, since some bn were resolved to strong
+		// types in resolveWeak. To remove these we need the original
+		// types, which are available again in resolveWhiteSpace
+		if (pcls[ich] == BDI_BN)
+		{
+			continue;
+		}
+		assert(pcls[ich] > 0); // "No Neutrals allowed to survive here."
+		assert(pcls[ich] < 5); // "Out of range."
+		plevel[ich] += add_level[odd(plevel[ich])][pcls[ich] - 1];
+	}
+}
+
+#if 0
+// === REORDER ===========================================================
+/*------------------------------------------------------------------------
+	Function: resolve_lines
+
+	Breaks a paragraph into lines
+
+	Input:	Character count
+			Array of line break flags
+	In/Out:	Array of characters
+
+	Returns the count of characters on the first line
+
+	Note: This function only breaks lines at hard line breaks. Other
+	line breaks can be passed in. If pbrk[n] is true, then a break
+	occurs after the character in psz_input[n]. Breaks before the first
+	character are not allowed.
+------------------------------------------------------------------------*/
+static int resolve_lines(uint32_t *psz_input, int *pbrk, int cch)
+{
+	int ich;
+
+	// skip characters not of type LS
+	for(ich = 0; ich < cch; ich++)
+	{
+		if (psz_input[ich] == chLS || (pbrk && pbrk[ich]))
+		{
+			ich++;
+			break;
+		}
+	}
+
+	return ich;
+}
+#endif
+
+/*------------------------------------------------------------------------
+	Function: fz_bidi_resolve_whitespace
+
+	Resolves levels for WS and S
+	Implements rule L1 of the Unicode bidi Algorithm.
+
+	Input:	Base embedding level
+			Character count
+			Array of direction classes (for one line of text)
+
+	In/Out: Array of embedding levels (for one line of text)
+
+	Note: this should be applied a line at a time. The default driver
+		  code supplied in this file assumes a single line of text; for
+		  a real implementation, cch and the initial pointer values
+		  would have to be adjusted.
+------------------------------------------------------------------------*/
+void fz_bidi_resolve_whitespace(fz_bidi_level baselevel, const fz_bidi_chartype *pcls, fz_bidi_level *plevel,
+				size_t cch)
+{
+	size_t cchrun = 0;
+	fz_bidi_level oldlevel = baselevel;
+	size_t ich;
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		switch(pcls[ich])
+		{
+		default:
+			cchrun = 0; // any other character breaks the run
+			break;
+		case BDI_WS:
+			cchrun++;
+			break;
+
+		case BDI_RLE:
+		case BDI_LRE:
+		case BDI_LRO:
+		case BDI_RLO:
+		case BDI_PDF:
+		case BDI_BN:
+			plevel[ich] = oldlevel;
+			cchrun++;
+			break;
+
+		case BDI_S:
+		case BDI_B:
+			// reset levels for WS before eot
+			set_deferred_level_run(plevel, cchrun, ich, baselevel);
+			cchrun = 0;
+			plevel[ich] = baselevel;
+			break;
+		}
+		oldlevel = plevel[ich];
+	}
+	// reset level before eot
+	set_deferred_level_run(plevel, cchrun, ich, baselevel);
+}
+
+#ifdef BIDI_LINE_AT_A_TIME
+/*------------------------------------------------------------------------
+	Functions: reorder/reorderLevel
+
+	Recursively reorders the display string
+	"From the highest level down, reverse all characters at that level and
+	higher, down to the lowest odd level"
+
+	Implements rule L2 of the Unicode bidi Algorithm.
+
+	Input: Array of embedding levels
+		   Character count
+		   Flag enabling reversal (set to false by initial caller)
+
+	In/Out: Text to reorder
+
+	Note: levels may exceed 15 resp. 61 on input.
+
+	Rule L3 - reorder combining marks is not implemented here
+	Rule L4 - glyph mirroring is implemented as a display option below
+
+	Note: this should be applied a line at a time
+-------------------------------------------------------------------------*/
+static int reorderLevel(fz_bidi_level level, uint32_t *psz_text, const fz_bidi_level *plevel, int cch,
+				 int f_reverse)
+{
+	int ich;
+
+	// true as soon as first odd level encountered
+	f_reverse = f_reverse || odd(level);
+
+	for (ich = 0; ich < cch; ich++)
+	{
+		if (plevel[ich] < level)
+		{
+			break;
+		}
+		else if (plevel[ich] > level)
+		{
+			ich += reorderLevel(level + 1, psz_text + ich, plevel + ich,
+				cch - ich, f_reverse) - 1;
+		}
+	}
+	if (f_reverse)
+	{
+		reverse(psz_text, ich);
+	}
+	return ich;
+}
+
+int Bidi_reorder(fz_bidi_level baselevel, uint32_t *psz_text, const fz_bidi_level *plevel, int cch)
+{
+	int ich = 0;
+
+	while (ich < cch)
+	{
+		ich += reorderLevel(baselevel, psz_text + ich, plevel + ich,
+			cch - ich, FALSE);
+	}
+	return ich;
+}
+#endif