diff mupdf-source/thirdparty/leptonica/src/parseprotos.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/parseprotos.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,954 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*
+ * \file  parseprotos.c
+ * <pre>
+ *
+ *       char             *parseForProtos()
+ *
+ *    Static helpers
+ *       static l_int32    getNextNonCommentLine()
+ *       static l_int32    getNextNonBlankLine()
+ *       static l_int32    getNextNonDoubleSlashLine()
+ *       static l_int32    searchForProtoSignature()
+ *       static char      *captureProtoSignature()
+ *       static char      *cleanProtoSignature()
+ *       static l_int32    skipToEndOfFunction()
+ *       static l_int32    skipToMatchingBrace()
+ *       static l_int32    skipToSemicolon()
+ *       static l_int32    getOffsetForCharacter()
+ *       static l_int32    getOffsetForMatchingRP()
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include "allheaders.h"
+
+#define L_BUF_SIZE 2048    /* max token size */
+
+static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
+static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
+static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
+            l_int32 *pnext);
+static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
+            l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
+            l_int32 *pfound);
+static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
+            l_int32 charindex);
+static char * cleanProtoSignature(char *str);
+static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
+            l_int32 charindex, l_int32 *pnext);
+static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
+            l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
+static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
+            l_int32 charindex, l_int32 *pnext);
+static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
+            l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
+static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
+            l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
+            l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
+
+
+/*
+ * \brief  parseForProtos()
+ *
+ * \param[in]   filein      output of cpp
+ * \param[in]   prestring   [optional] string that prefaces each decl;
+ *                          use NULL to omit
+ * \return   parsestr  string of function prototypes, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) We parse the output of cpp:
+ *              cpp -ansi <filein>
+ *          Three plans were attempted, with success on the third.
+ *      (2) Plan 1.  A cursory examination of the cpp output indicated that
+ *          every function was preceded by a cpp comment statement.
+ *          So we just need to look at statements beginning after comments.
+ *          Unfortunately, this is NOT the case.  Some functions start
+ *          without cpp comment lines, typically when there are no
+ *          comments in the source that immediately precede the function.
+ *      (3) Plan 2.  Consider the keywords in the language that start
+ *          parts of the cpp file.  Some, like 'enum', 'union' and
+ *          'struct', are followed after a while by '{', and eventually
+ *          end with '}, plus an optional token and a final ';'.
+ *          Others, like 'extern', 'static' and 'typedef', are never
+ *          the beginnings of global function definitions.   Function
+ *          prototypes have one or more sets of '(' followed eventually
+ *          by a ')', and end with ';'.  But function definitions have
+ *          tokens, followed by '(', more tokens, ')' and then
+ *          immediately a '{'.  We would generate a prototype from this
+ *          by adding a ';' to all tokens up to the ')'.  So we use
+ *          these special tokens to decide what we are parsing.  And
+ *          whenever a function definition is found and the prototype
+ *          extracted, we skip through the rest of the function
+ *          past the corresponding '}'.  This token ends a line, and
+ *          is often on a line of its own.  But as it turns out,
+ *          the only keyword we need to consider is 'static'.
+ *      (4) Plan 3.  Consider the parentheses and braces for various
+ *          declarations.  A struct, enum, or union has a pair of
+ *          braces followed by a semicolon.  With the exception of an
+ *          __attribute__ declaration for a struct, they cannot have parentheses
+ *          before the left brace, but a struct can have lots of parentheses
+ *          within the brace set.  A function prototype has no braces.
+ *          A function declaration can have sets of left and right
+ *          parentheses, but these are followed by a left brace.
+ *          So plan 3 looks at the way parentheses and braces are
+ *          organized.  Once the beginning of a function definition
+ *          is found, the prototype is extracted and we search for
+ *          the ending right brace.
+ *      (5) To find the ending right brace, it is necessary to do some
+ *          careful parsing.  For example, in this file, we have
+ *          left and right braces as characters, and these must not
+ *          be counted.  Somewhat more tricky, the file fhmtauto.c
+ *          generates code, and includes a right brace in a string.
+ *          So we must not include braces that are in strings.  But how
+ *          do we know if something is inside a string?  Keep state,
+ *          starting with not-inside, and every time you hit a double quote
+ *          that is not escaped, toggle the condition.  Any brace
+ *          found in the state of being within a string is ignored.
+ *      (6) When a prototype is extracted, it is put in a canonical
+ *          form (i.e., cleaned up).  Finally, we check that it is
+ *          not static and save it.  (If static, it is ignored).
+ *      (7) The %prestring for unix is NULL; it is included here so that
+ *          you can use Microsoft's declaration for importing or
+ *          exporting to a dll.  See environ.h for examples of use.
+ *          Here, we set: %prestring = "LEPT_DLL ".  Note in particular
+ *          the space character that will separate 'LEPT_DLL' from
+ *          the standard unix prototype that follows.
+ * </pre>
+ */
+char *
+parseForProtos(const char *filein,
+               const char *prestring)
+{
+char    *strdata, *str, *newstr, *parsestr, *secondword;
+l_int32  start, next, stop, charindex, found;
+size_t   nbytes;
+SARRAY  *sa, *saout, *satest;
+
+    if (!filein)
+        return (char *)ERROR_PTR("filein not defined", __func__, NULL);
+
+        /* Read in the cpp output into memory, one string for each
+         * line in the file, omitting blank lines.  */
+    strdata = (char *)l_binaryRead(filein, &nbytes);
+    sa = sarrayCreateLinesFromString(strdata, 0);
+
+    saout = sarrayCreate(0);
+    next = 0;
+    while (1) {  /* repeat after each non-static prototype is extracted */
+        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
+        if (!found)
+            break;
+/*        lept_stderr("  start = %d, stop = %d, charindex = %d\n",
+                      start, stop, charindex); */
+        str = captureProtoSignature(sa, start, stop, charindex);
+
+            /* Make sure that the signature found by cpp does not begin with
+             * static, extern or typedef.  We get 'extern' declarations
+             * from header files, and with some versions of cpp running on
+             * #include <sys/stat.h> we get something of the form:
+             *    extern ... (( ... )) ... ( ... ) { ...
+             * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
+             * and there is a lot of garbage between the rp and the lp.
+             * It is easiest to simply reject any signature that starts
+             * with 'extern'.  Note also that an 'extern' token has been
+             * prepended to each prototype, so the 'static' or
+             * 'extern' keywords we are looking for, if they exist,
+             * would be the second word.  We also have a typedef in
+             * bmpio.c that has the form:
+             *    typedef struct __attribute__((....)) { ...} ... ;
+             * This is avoided by blacklisting 'typedef' along with 'extern'
+             * and 'static'. */
+        satest = sarrayCreateWordsFromString(str);
+        secondword = sarrayGetString(satest, 1, L_NOCOPY);
+        if (strcmp(secondword, "static") &&  /* not static */
+            strcmp(secondword, "extern") &&  /* not extern */
+            strcmp(secondword, "typedef")) {  /* not typedef */
+            if (prestring) {  /* prepend it to the prototype */
+                newstr = stringJoin(prestring, str);
+                sarrayAddString(saout, newstr, L_INSERT);
+                LEPT_FREE(str);
+            } else {
+                sarrayAddString(saout, str, L_INSERT);
+            }
+        } else {
+            LEPT_FREE(str);
+        }
+        sarrayDestroy(&satest);
+
+        skipToEndOfFunction(sa, stop, charindex, &next);
+        if (next == -1) break;
+    }
+
+        /* Flatten into a string with newlines between prototypes */
+    parsestr = sarrayToString(saout, 1);
+    LEPT_FREE(strdata);
+    sarrayDestroy(&sa);
+    sarrayDestroy(&saout);
+
+    return parsestr;
+}
+
+
+/*
+ * \brief  getNextNonCommentLine()
+ *
+ * \param[in]   sa      output from cpp, by line)
+ * \param[in]   start   starting index to search)
+ * \param[out]  pnext   index of first uncommented line after the start line
+ * \return  0 if OK, o on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Skips over all consecutive comment lines, beginning at 'start'
+ *      (2) If all lines to the end are '#' comments, return next = -1
+ * </pre>
+ */
+static l_int32
+getNextNonCommentLine(SARRAY  *sa,
+                      l_int32  start,
+                      l_int32 *pnext)
+{
+char    *str;
+l_int32  i, n;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pnext)
+        return ERROR_INT("&pnext not defined", __func__, 1);
+
+        /* Init for situation where this line and all following are comments */
+    *pnext = -1;
+
+    n = sarrayGetCount(sa);
+    for (i = start; i < n; i++) {
+        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
+            return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
+        if (str[0] != '#') {
+            *pnext = i;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+
+/*
+ * \brief  getNextNonBlankLine()
+ *
+ * \param[in]    sa      output from cpp, by line
+ * \param[in]    start   starting index to search
+ * \param[out]   pnext   index of first nonblank line after the start line
+ * \return   0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Skips over all consecutive blank lines, beginning at 'start'
+ *      (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
+ *      (3) If all lines to the end are blank, return next = -1
+ * </pre>
+ */
+static l_int32
+getNextNonBlankLine(SARRAY  *sa,
+                    l_int32  start,
+                    l_int32 *pnext)
+{
+char    *str;
+l_int32  i, j, n, len;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pnext)
+        return ERROR_INT("&pnext not defined", __func__, 1);
+
+        /* Init for situation where this line and all following are blank */
+    *pnext = -1;
+
+    n = sarrayGetCount(sa);
+    for (i = start; i < n; i++) {
+        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
+            return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
+        len = strlen(str);
+        for (j = 0; j < len; j++) {
+            if (str[j] != ' ' && str[j] != '\t'
+                && str[j] != '\n' && str[j] != '\r') {  /* non-blank */
+                *pnext = i;
+                return 0;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+/*
+ * \brief  getNextNonDoubleSlashLine()
+ *
+ * \param[in]     sa      output from cpp, by line
+ * \param[in]     start   starting index to search
+ * \param[out]    pnext   index of first uncommented line after the start line
+ * \return   0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Skips over all consecutive '//' lines, beginning at 'start'
+ *      (2) If all lines to the end start with '//', return next = -1
+ * </pre>
+ */
+static l_int32
+getNextNonDoubleSlashLine(SARRAY  *sa,
+                          l_int32  start,
+                          l_int32 *pnext)
+{
+char    *str;
+l_int32  i, n, len;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pnext)
+        return ERROR_INT("&pnext not defined", __func__, 1);
+
+        /* Init for situation where this line and all following
+         * start with '//' */
+    *pnext = -1;
+
+    n = sarrayGetCount(sa);
+    for (i = start; i < n; i++) {
+        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
+            return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
+        len = strlen(str);
+        if (len < 2 || str[0] != '/' || str[1] != '/') {
+            *pnext = i;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+
+/*
+ * \brief  searchForProtoSignature()
+ *
+ * \param[in]     sa           output from cpp, by line
+ * \param[in]     begin        beginning index to search
+ * \param[out]    pstart       starting index for function definition
+ * \param[out]    pstop        index of line on which proto is completed
+ * \param[out]    pcharindex   char index of completing ')' character
+ * \param[out]    pfound       1 if valid signature is found; 0 otherwise
+ * \return   0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If this returns found == 0, it means that there are no
+ *          more function definitions in the file.  Caller must check
+ *          this value and exit the loop over the entire cpp file.
+ *      (2) This follows plan 3 (see above).  We skip comment and blank
+ *          lines at the beginning.  Then we don't check for keywords.
+ *          Instead, find the relative locations of the first occurrences
+ *          of these four tokens: left parenthesis (lp), right
+ *          parenthesis (rp), left brace (lb) and semicolon (sc).
+ *      (3) The signature of a function definition looks like this:
+ *               .... '(' .... ')' '{'
+ *          where the lp and rp must both precede the lb, with only
+ *          whitespace between the rp and the lb.  The '....'
+ *          are sets of tokens that have no braces.
+ *      (4) If a function definition is found, this returns found = 1,
+ *          with 'start' being the first line of the definition and
+ *          'charindex' being the position of the ')' in line 'stop'
+ *          at the end of the arg list.
+ * </pre>
+ */
+static l_int32
+searchForProtoSignature(SARRAY   *sa,
+                        l_int32   begin,
+                        l_int32  *pstart,
+                        l_int32  *pstop,
+                        l_int32  *pcharindex,
+                        l_int32  *pfound)
+{
+l_int32  next, rbline, rbindex, scline;
+l_int32  soffsetlp, soffsetrp, soffsetlb, soffsetsc;
+l_int32  boffsetlp, boffsetrp, boffsetlb, boffsetsc;
+l_int32  toffsetlp, toffsetrp, toffsetlb, toffsetsc;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pstart)
+        return ERROR_INT("&start not defined", __func__, 1);
+    if (!pstop)
+        return ERROR_INT("&stop not defined", __func__, 1);
+    if (!pcharindex)
+        return ERROR_INT("&charindex not defined", __func__, 1);
+    if (!pfound)
+        return ERROR_INT("&found not defined", __func__, 1);
+
+    *pfound = FALSE;
+
+    while (1) {
+
+            /* Skip over sequential '#' comment lines */
+        getNextNonCommentLine(sa, begin, &next);
+        if (next == -1) return 0;
+        if (next != begin) {
+            begin = next;
+            continue;
+        }
+
+            /* Skip over sequential blank lines */
+        getNextNonBlankLine(sa, begin, &next);
+        if (next == -1) return 0;
+        if (next != begin) {
+            begin = next;
+            continue;
+        }
+
+            /* Skip over sequential lines starting with '//' */
+        getNextNonDoubleSlashLine(sa, begin, &next);
+        if (next == -1) return 0;
+        if (next != begin) {
+            begin = next;
+            continue;
+        }
+
+            /* Search for specific character sequence patterns; namely
+             * a lp, a matching rp, a lb and a semicolon.
+             * Abort the search if no lp is found. */
+        getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
+                              &toffsetlp);
+        if (soffsetlp == -1)
+            break;
+        getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
+                               &soffsetrp, &boffsetrp, &toffsetrp);
+        getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
+                              &toffsetlb);
+        getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
+                              &toffsetsc);
+
+            /* We've found a lp.  Now weed out the case where a matching
+             * rp and a lb are not both found. */
+        if (soffsetrp == -1 || soffsetlb == -1)
+            break;
+
+            /* Check if a left brace occurs before a left parenthesis;
+             * if so, skip it */
+        if (toffsetlb < toffsetlp) {
+            skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
+                &rbline, &rbindex);
+            skipToSemicolon(sa, rbline, rbindex, &scline);
+            begin = scline + 1;
+            continue;
+        }
+
+            /* Check if a semicolon occurs before a left brace or
+             * a left parenthesis; if so, skip it */
+        if ((soffsetsc != -1) &&
+            (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
+            skipToSemicolon(sa, next, 0, &scline);
+            begin = scline + 1;
+            continue;
+        }
+
+            /* OK, it should be a function definition.  We haven't
+             * checked that there is only white space between the
+             * rp and lb, but we've only seen problems with two
+             * extern inlines in sys/stat.h, and this is handled
+             * later by eliminating any prototype beginning with 'extern'. */
+        *pstart = next;
+        *pstop = next + soffsetrp;
+        *pcharindex = boffsetrp;
+        *pfound = TRUE;
+        break;
+    }
+
+    return 0;
+}
+
+
+/*
+ * \brief  captureProtoSignature()
+ *
+ * \param[in]    sa          output from cpp, by line
+ * \param[in]    start       starting index to search; never a comment line
+ * \param[in]    stop        index of line on which pattern is completed
+ * \param[in]    charindex   char index of completing ')' character
+ * \return  cleanstr   prototype string, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Return all characters, ending with a ';' after the ')'
+ * </pre>
+ */
+static char *
+captureProtoSignature(SARRAY  *sa,
+                      l_int32  start,
+                      l_int32  stop,
+                      l_int32  charindex)
+{
+char    *str, *newstr, *protostr, *cleanstr;
+SARRAY  *sap;
+l_int32  i;
+
+    if (!sa)
+        return (char *)ERROR_PTR("sa not defined", __func__, NULL);
+
+    sap = sarrayCreate(0);
+    for (i = start; i < stop; i++) {
+        str = sarrayGetString(sa, i, L_COPY);
+        sarrayAddString(sap, str, L_INSERT);
+    }
+    str = sarrayGetString(sa, stop, L_COPY);
+    str[charindex + 1] = '\0';
+    newstr = stringJoin(str, ";");
+    sarrayAddString(sap, newstr, L_INSERT);
+    LEPT_FREE(str);
+    protostr = sarrayToString(sap, 2);
+    sarrayDestroy(&sap);
+    cleanstr = cleanProtoSignature(protostr);
+    LEPT_FREE(protostr);
+
+    return cleanstr;
+}
+
+
+/*
+ * \brief  cleanProtoSignature()
+ *
+ * \param[in]   instr  input prototype string
+ * \return  cleanstr   clean prototype string, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Adds 'extern' at beginning and regularizes spaces
+ *          between tokens.
+ * </pre>
+ */
+static char *
+cleanProtoSignature(char *instr)
+{
+char    *str, *cleanstr;
+char     buf[L_BUF_SIZE];
+char     externstring[] = "extern";
+l_int32  i, j, nwords, nchars, index, len;
+SARRAY  *sa, *saout;
+
+    if (!instr)
+        return (char *)ERROR_PTR("instr not defined", __func__, NULL);
+
+    sa = sarrayCreateWordsFromString(instr);
+    nwords = sarrayGetCount(sa);
+    saout = sarrayCreate(0);
+    sarrayAddString(saout, externstring, L_COPY);
+    for (i = 0; i < nwords; i++) {
+        str = sarrayGetString(sa, i, L_NOCOPY);
+        nchars = strlen(str);
+        index = 0;
+        for (j = 0; j < nchars; j++) {
+            if (index > L_BUF_SIZE - 6) {
+                sarrayDestroy(&sa);
+                sarrayDestroy(&saout);
+                return (char *)ERROR_PTR("token too large", __func__, NULL);
+            }
+            if (str[j] == '(') {
+                buf[index++] = ' ';
+                buf[index++] = '(';
+                buf[index++] = ' ';
+            } else if (str[j] == ')') {
+                buf[index++] = ' ';
+                buf[index++] = ')';
+            } else {
+                buf[index++] = str[j];
+            }
+        }
+        buf[index] = '\0';
+        sarrayAddString(saout, buf, L_COPY);
+    }
+
+        /* Flatten to a prototype string with spaces added after
+         * each word, and remove the last space */
+    cleanstr = sarrayToString(saout, 2);
+    len = strlen(cleanstr);
+    cleanstr[len - 1] = '\0';
+
+    sarrayDestroy(&sa);
+    sarrayDestroy(&saout);
+    return cleanstr;
+}
+
+
+/*
+ * \brief  skipToEndOfFunction()
+ *
+ * \param[in]    sa        output from cpp, by line
+ * \param[in]    start     index of starting line with left bracket to search
+ * \param[in]    lbindex   starting char index for left bracket
+ * \param[out]   pnext     index of line following the ending '}' for function
+ * \return  0 if OK, 1 on error
+ */
+static l_int32
+skipToEndOfFunction(SARRAY   *sa,
+                    l_int32   start,
+                    l_int32   lbindex,
+                    l_int32  *pnext)
+{
+l_int32  end, rbindex;
+l_int32 soffsetlb, boffsetlb, toffsetlb;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pnext)
+        return ERROR_INT("&next not defined", __func__, 1);
+
+    getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
+                &toffsetlb);
+    skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
+    if (end == -1) {  /* shouldn't happen! */
+        *pnext = -1;
+        return 1;
+    }
+
+    *pnext = end + 1;
+    return 0;
+}
+
+
+/*
+ * \brief  skipToMatchingBrace()
+ *
+ * \param[in]    sa         output from cpp, by line
+ * \param[in]    start      index of starting line with left bracket to search
+ * \param[in]    lbindex    starting char index for left bracket
+ * \param[out]   pstop      index of line with the matching right bracket
+ * \param[out]   prbindex   char index of matching right bracket
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If the matching right brace is not found, returns
+ *          stop = -1.  This shouldn't happen.
+ * </pre>
+ */
+static l_int32
+skipToMatchingBrace(SARRAY   *sa,
+                    l_int32   start,
+                    l_int32   lbindex,
+                    l_int32  *pstop,
+                    l_int32  *prbindex)
+{
+char    *str;
+l_int32  i, j, jstart, n, sumbrace, found, instring, nchars;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pstop)
+        return ERROR_INT("&stop not defined", __func__, 1);
+    if (!prbindex)
+        return ERROR_INT("&rbindex not defined", __func__, 1);
+
+    instring = 0;  /* init to FALSE; toggle on double quotes */
+    *pstop = -1;
+    n = sarrayGetCount(sa);
+    sumbrace = 1;
+    found = FALSE;
+    for (i = start; i < n; i++) {
+        str = sarrayGetString(sa, i, L_NOCOPY);
+        jstart = 0;
+        if (i == start)
+            jstart = lbindex + 1;
+        nchars = strlen(str);
+        for (j = jstart; j < nchars; j++) {
+                /* Toggle the instring state every time you encounter
+                 * a double quote that is NOT escaped. */
+            if (j == jstart && str[j] == '\"')
+                instring = 1 - instring;
+            if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
+                instring = 1 - instring;
+                /* Record the braces if they are neither a literal character
+                 * nor within a string. */
+            if (str[j] == '{' && str[j+1] != '\'' && !instring) {
+                sumbrace++;
+            } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
+                sumbrace--;
+                if (sumbrace == 0) {
+                    found = TRUE;
+                    *prbindex = j;
+                    break;
+                }
+            }
+        }
+        if (found) {
+            *pstop = i;
+            return 0;
+        }
+    }
+
+    return ERROR_INT("matching right brace not found", __func__, 1);
+}
+
+
+/*
+ * \brief  skipToSemicolon()
+ *
+ * \param[in]     sa          output from cpp, by line
+ * \param[in]     start       index of starting line to search
+ * \param[in]     charindex   starting char index for search
+ * \param[out]    pnext       index of line containing the next ';'
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If the semicolon isn't found, returns next = -1.
+ *          This shouldn't happen.
+ *      (2) This is only used in contexts where the semicolon is
+ *          not within a string.
+ * </pre>
+ */
+static l_int32
+skipToSemicolon(SARRAY   *sa,
+                l_int32   start,
+                l_int32   charindex,
+                l_int32  *pnext)
+{
+char    *str;
+l_int32  i, j, n, jstart, nchars, found;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!pnext)
+        return ERROR_INT("&next not defined", __func__, 1);
+
+    *pnext = -1;
+    n = sarrayGetCount(sa);
+    found = FALSE;
+    for (i = start; i < n; i++) {
+        str = sarrayGetString(sa, i, L_NOCOPY);
+        jstart = 0;
+        if (i == start)
+            jstart = charindex + 1;
+        nchars = strlen(str);
+        for (j = jstart; j < nchars; j++) {
+            if (str[j] == ';') {
+                found = TRUE;;
+                break;
+            }
+        }
+        if (found) {
+            *pnext = i;
+            return 0;
+        }
+    }
+
+    return ERROR_INT("semicolon not found", __func__, 1);
+}
+
+
+/*
+ * \brief  getOffsetForCharacter()
+ *
+ * \param[in]    sa         output from cpp, by line
+ * \param[in]    start      starting index in sa to search;
+ *                          never a comment line
+ * \param[in]    tchar      we are searching for the first instance of this
+ * \param[out]   psoffset   offset in strings from start index
+ * \param[out]   pboffset   offset in bytes within string in which
+ *                          the character is first found
+ * \param[out]   ptoffset   offset in total bytes from beginning of string
+ *                          indexed by 'start' to the location where
+ *                          the character is first found
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) We are searching for the first instance of 'tchar', starting
+ *          at the beginning of the string indexed by start.
+ *      (2) If the character is not found, soffset is returned as -1,
+ *          and the other offsets are set to very large numbers.  The
+ *          caller must check the value of soffset.
+ *      (3) This is only used in contexts where it is not necessary to
+ *          consider if the character is inside a string.
+ * </pre>
+ */
+static l_int32
+getOffsetForCharacter(SARRAY   *sa,
+                      l_int32   start,
+                      char      tchar,
+                      l_int32  *psoffset,
+                      l_int32  *pboffset,
+                      l_int32  *ptoffset)
+{
+char    *str;
+l_int32  i, j, n, nchars, totchars, found;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!psoffset)
+        return ERROR_INT("&soffset not defined", __func__, 1);
+    if (!pboffset)
+        return ERROR_INT("&boffset not defined", __func__, 1);
+    if (!ptoffset)
+        return ERROR_INT("&toffset not defined", __func__, 1);
+
+    *psoffset = -1;  /* init to not found */
+    *pboffset = 100000000;
+    *ptoffset = 100000000;
+
+    n = sarrayGetCount(sa);
+    found = FALSE;
+    totchars = 0;
+    for (i = start; i < n; i++) {
+        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
+            return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
+        nchars = strlen(str);
+        for (j = 0; j < nchars; j++) {
+            if (str[j] == tchar) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found)
+            break;
+        totchars += nchars;
+    }
+
+    if (found) {
+        *psoffset = i - start;
+        *pboffset = j;
+        *ptoffset = totchars + j;
+    }
+
+    return 0;
+}
+
+
+/*
+ * \brief  getOffsetForMatchingRP()
+ *
+ * \param[in]    sa          output from cpp, by line
+ * \param[in]    start       starting index in sa to search;
+ *                           never a comment line
+ * \param[in]    soffsetlp   string offset to first LP
+ * \param[in]    boffsetlp   byte offset within string to first LP
+ * \param[in]    toffsetlp   total byte offset to first LP
+ * \param[out]   psoffset    offset in strings from start index
+ * \param[out]   pboffset    offset in bytes within string in which
+ *                           the matching RP is found
+ * \param[out]   ptoffset    offset in total bytes from beginning of string
+ *                           indexed by 'start' to the location where
+ *                           the matching RP is found
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) We are searching for the matching right parenthesis (RP) that
+ *          corresponds to the first LP found beginning at the string
+ *          indexed by start.
+ *      (2) If the matching RP is not found, soffset is returned as -1,
+ *          and the other offsets are set to very large numbers.  The
+ *          caller must check the value of soffset.
+ *      (3) This is only used in contexts where it is not necessary to
+ *          consider if the character is inside a string.
+ *      (4) We must do this because although most arg lists have a single
+ *          left and right parenthesis, it is possible to construct
+ *          more complicated prototype declarations, such as those
+ *          where functions are passed in.  The C++ rules for prototypes
+ *          are strict, and require that for functions passed in as args,
+ *          the function name arg be placed in parenthesis, as well
+ *          as its arg list, thus incurring two extra levels of parentheses.
+ * </pre>
+ */
+static l_int32
+getOffsetForMatchingRP(SARRAY   *sa,
+                       l_int32   start,
+                       l_int32   soffsetlp,
+                       l_int32   boffsetlp,
+                       l_int32   toffsetlp,
+                       l_int32  *psoffset,
+                       l_int32  *pboffset,
+                       l_int32  *ptoffset)
+{
+char    *str;
+l_int32  i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
+
+    if (!sa)
+        return ERROR_INT("sa not defined", __func__, 1);
+    if (!psoffset)
+        return ERROR_INT("&soffset not defined", __func__, 1);
+    if (!pboffset)
+        return ERROR_INT("&boffset not defined", __func__, 1);
+    if (!ptoffset)
+        return ERROR_INT("&toffset not defined", __func__, 1);
+
+    *psoffset = -1;  /* init to not found */
+    *pboffset = 100000000;
+    *ptoffset = 100000000;
+
+    n = sarrayGetCount(sa);
+    found = FALSE;
+    totchars = toffsetlp;
+    leftmatch = 1;  /* count of (LP - RP); we're finished when it goes to 0. */
+    firstline = start + soffsetlp;
+    for (i = firstline; i < n; i++) {
+        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
+            return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
+        nchars = strlen(str);
+        jstart = 0;
+        if (i == firstline)
+            jstart = boffsetlp + 1;
+        for (j = jstart; j < nchars; j++) {
+            if (str[j] == '(')
+                leftmatch++;
+            else if (str[j] == ')')
+                leftmatch--;
+            if (leftmatch == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found)
+            break;
+        if (i == firstline)
+            totchars += nchars - boffsetlp;
+        else
+            totchars += nchars;
+    }
+
+    if (found) {
+        *psoffset = i - start;
+        *pboffset = j;
+        *ptoffset = totchars + j;
+    }
+
+    return 0;
+}