comparison mupdf-source/thirdparty/leptonica/src/parseprotos.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * \file parseprotos.c
29 * <pre>
30 *
31 * char *parseForProtos()
32 *
33 * Static helpers
34 * static l_int32 getNextNonCommentLine()
35 * static l_int32 getNextNonBlankLine()
36 * static l_int32 getNextNonDoubleSlashLine()
37 * static l_int32 searchForProtoSignature()
38 * static char *captureProtoSignature()
39 * static char *cleanProtoSignature()
40 * static l_int32 skipToEndOfFunction()
41 * static l_int32 skipToMatchingBrace()
42 * static l_int32 skipToSemicolon()
43 * static l_int32 getOffsetForCharacter()
44 * static l_int32 getOffsetForMatchingRP()
45 * </pre>
46 */
47
48 #ifdef HAVE_CONFIG_H
49 #include <config_auto.h>
50 #endif /* HAVE_CONFIG_H */
51
52 #include <string.h>
53 #include "allheaders.h"
54
55 #define L_BUF_SIZE 2048 /* max token size */
56
57 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
58 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
59 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
60 l_int32 *pnext);
61 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
62 l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
63 l_int32 *pfound);
64 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
65 l_int32 charindex);
66 static char * cleanProtoSignature(char *str);
67 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
68 l_int32 charindex, l_int32 *pnext);
69 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
70 l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
71 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
72 l_int32 charindex, l_int32 *pnext);
73 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
74 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
75 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
76 l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
77 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
78
79
80 /*
81 * \brief parseForProtos()
82 *
83 * \param[in] filein output of cpp
84 * \param[in] prestring [optional] string that prefaces each decl;
85 * use NULL to omit
86 * \return parsestr string of function prototypes, or NULL on error
87 *
88 * <pre>
89 * Notes:
90 * (1) We parse the output of cpp:
91 * cpp -ansi <filein>
92 * Three plans were attempted, with success on the third.
93 * (2) Plan 1. A cursory examination of the cpp output indicated that
94 * every function was preceded by a cpp comment statement.
95 * So we just need to look at statements beginning after comments.
96 * Unfortunately, this is NOT the case. Some functions start
97 * without cpp comment lines, typically when there are no
98 * comments in the source that immediately precede the function.
99 * (3) Plan 2. Consider the keywords in the language that start
100 * parts of the cpp file. Some, like 'enum', 'union' and
101 * 'struct', are followed after a while by '{', and eventually
102 * end with '}, plus an optional token and a final ';'.
103 * Others, like 'extern', 'static' and 'typedef', are never
104 * the beginnings of global function definitions. Function
105 * prototypes have one or more sets of '(' followed eventually
106 * by a ')', and end with ';'. But function definitions have
107 * tokens, followed by '(', more tokens, ')' and then
108 * immediately a '{'. We would generate a prototype from this
109 * by adding a ';' to all tokens up to the ')'. So we use
110 * these special tokens to decide what we are parsing. And
111 * whenever a function definition is found and the prototype
112 * extracted, we skip through the rest of the function
113 * past the corresponding '}'. This token ends a line, and
114 * is often on a line of its own. But as it turns out,
115 * the only keyword we need to consider is 'static'.
116 * (4) Plan 3. Consider the parentheses and braces for various
117 * declarations. A struct, enum, or union has a pair of
118 * braces followed by a semicolon. With the exception of an
119 * __attribute__ declaration for a struct, they cannot have parentheses
120 * before the left brace, but a struct can have lots of parentheses
121 * within the brace set. A function prototype has no braces.
122 * A function declaration can have sets of left and right
123 * parentheses, but these are followed by a left brace.
124 * So plan 3 looks at the way parentheses and braces are
125 * organized. Once the beginning of a function definition
126 * is found, the prototype is extracted and we search for
127 * the ending right brace.
128 * (5) To find the ending right brace, it is necessary to do some
129 * careful parsing. For example, in this file, we have
130 * left and right braces as characters, and these must not
131 * be counted. Somewhat more tricky, the file fhmtauto.c
132 * generates code, and includes a right brace in a string.
133 * So we must not include braces that are in strings. But how
134 * do we know if something is inside a string? Keep state,
135 * starting with not-inside, and every time you hit a double quote
136 * that is not escaped, toggle the condition. Any brace
137 * found in the state of being within a string is ignored.
138 * (6) When a prototype is extracted, it is put in a canonical
139 * form (i.e., cleaned up). Finally, we check that it is
140 * not static and save it. (If static, it is ignored).
141 * (7) The %prestring for unix is NULL; it is included here so that
142 * you can use Microsoft's declaration for importing or
143 * exporting to a dll. See environ.h for examples of use.
144 * Here, we set: %prestring = "LEPT_DLL ". Note in particular
145 * the space character that will separate 'LEPT_DLL' from
146 * the standard unix prototype that follows.
147 * </pre>
148 */
149 char *
150 parseForProtos(const char *filein,
151 const char *prestring)
152 {
153 char *strdata, *str, *newstr, *parsestr, *secondword;
154 l_int32 start, next, stop, charindex, found;
155 size_t nbytes;
156 SARRAY *sa, *saout, *satest;
157
158 if (!filein)
159 return (char *)ERROR_PTR("filein not defined", __func__, NULL);
160
161 /* Read in the cpp output into memory, one string for each
162 * line in the file, omitting blank lines. */
163 strdata = (char *)l_binaryRead(filein, &nbytes);
164 sa = sarrayCreateLinesFromString(strdata, 0);
165
166 saout = sarrayCreate(0);
167 next = 0;
168 while (1) { /* repeat after each non-static prototype is extracted */
169 searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
170 if (!found)
171 break;
172 /* lept_stderr(" start = %d, stop = %d, charindex = %d\n",
173 start, stop, charindex); */
174 str = captureProtoSignature(sa, start, stop, charindex);
175
176 /* Make sure that the signature found by cpp does not begin with
177 * static, extern or typedef. We get 'extern' declarations
178 * from header files, and with some versions of cpp running on
179 * #include <sys/stat.h> we get something of the form:
180 * extern ... (( ... )) ... ( ... ) { ...
181 * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
182 * and there is a lot of garbage between the rp and the lp.
183 * It is easiest to simply reject any signature that starts
184 * with 'extern'. Note also that an 'extern' token has been
185 * prepended to each prototype, so the 'static' or
186 * 'extern' keywords we are looking for, if they exist,
187 * would be the second word. We also have a typedef in
188 * bmpio.c that has the form:
189 * typedef struct __attribute__((....)) { ...} ... ;
190 * This is avoided by blacklisting 'typedef' along with 'extern'
191 * and 'static'. */
192 satest = sarrayCreateWordsFromString(str);
193 secondword = sarrayGetString(satest, 1, L_NOCOPY);
194 if (strcmp(secondword, "static") && /* not static */
195 strcmp(secondword, "extern") && /* not extern */
196 strcmp(secondword, "typedef")) { /* not typedef */
197 if (prestring) { /* prepend it to the prototype */
198 newstr = stringJoin(prestring, str);
199 sarrayAddString(saout, newstr, L_INSERT);
200 LEPT_FREE(str);
201 } else {
202 sarrayAddString(saout, str, L_INSERT);
203 }
204 } else {
205 LEPT_FREE(str);
206 }
207 sarrayDestroy(&satest);
208
209 skipToEndOfFunction(sa, stop, charindex, &next);
210 if (next == -1) break;
211 }
212
213 /* Flatten into a string with newlines between prototypes */
214 parsestr = sarrayToString(saout, 1);
215 LEPT_FREE(strdata);
216 sarrayDestroy(&sa);
217 sarrayDestroy(&saout);
218
219 return parsestr;
220 }
221
222
223 /*
224 * \brief getNextNonCommentLine()
225 *
226 * \param[in] sa output from cpp, by line)
227 * \param[in] start starting index to search)
228 * \param[out] pnext index of first uncommented line after the start line
229 * \return 0 if OK, o on error
230 *
231 * <pre>
232 * Notes:
233 * (1) Skips over all consecutive comment lines, beginning at 'start'
234 * (2) If all lines to the end are '#' comments, return next = -1
235 * </pre>
236 */
237 static l_int32
238 getNextNonCommentLine(SARRAY *sa,
239 l_int32 start,
240 l_int32 *pnext)
241 {
242 char *str;
243 l_int32 i, n;
244
245 if (!sa)
246 return ERROR_INT("sa not defined", __func__, 1);
247 if (!pnext)
248 return ERROR_INT("&pnext not defined", __func__, 1);
249
250 /* Init for situation where this line and all following are comments */
251 *pnext = -1;
252
253 n = sarrayGetCount(sa);
254 for (i = start; i < n; i++) {
255 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
256 return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
257 if (str[0] != '#') {
258 *pnext = i;
259 return 0;
260 }
261 }
262
263 return 0;
264 }
265
266
267 /*
268 * \brief getNextNonBlankLine()
269 *
270 * \param[in] sa output from cpp, by line
271 * \param[in] start starting index to search
272 * \param[out] pnext index of first nonblank line after the start line
273 * \return 0 if OK, 1 on error
274 *
275 * <pre>
276 * Notes:
277 * (1) Skips over all consecutive blank lines, beginning at 'start'
278 * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
279 * (3) If all lines to the end are blank, return next = -1
280 * </pre>
281 */
282 static l_int32
283 getNextNonBlankLine(SARRAY *sa,
284 l_int32 start,
285 l_int32 *pnext)
286 {
287 char *str;
288 l_int32 i, j, n, len;
289
290 if (!sa)
291 return ERROR_INT("sa not defined", __func__, 1);
292 if (!pnext)
293 return ERROR_INT("&pnext not defined", __func__, 1);
294
295 /* Init for situation where this line and all following are blank */
296 *pnext = -1;
297
298 n = sarrayGetCount(sa);
299 for (i = start; i < n; i++) {
300 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
301 return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
302 len = strlen(str);
303 for (j = 0; j < len; j++) {
304 if (str[j] != ' ' && str[j] != '\t'
305 && str[j] != '\n' && str[j] != '\r') { /* non-blank */
306 *pnext = i;
307 return 0;
308 }
309 }
310 }
311
312 return 0;
313 }
314
315
316 /*
317 * \brief getNextNonDoubleSlashLine()
318 *
319 * \param[in] sa output from cpp, by line
320 * \param[in] start starting index to search
321 * \param[out] pnext index of first uncommented line after the start line
322 * \return 0 if OK, 1 on error
323 *
324 * <pre>
325 * Notes:
326 * (1) Skips over all consecutive '//' lines, beginning at 'start'
327 * (2) If all lines to the end start with '//', return next = -1
328 * </pre>
329 */
330 static l_int32
331 getNextNonDoubleSlashLine(SARRAY *sa,
332 l_int32 start,
333 l_int32 *pnext)
334 {
335 char *str;
336 l_int32 i, n, len;
337
338 if (!sa)
339 return ERROR_INT("sa not defined", __func__, 1);
340 if (!pnext)
341 return ERROR_INT("&pnext not defined", __func__, 1);
342
343 /* Init for situation where this line and all following
344 * start with '//' */
345 *pnext = -1;
346
347 n = sarrayGetCount(sa);
348 for (i = start; i < n; i++) {
349 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
350 return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
351 len = strlen(str);
352 if (len < 2 || str[0] != '/' || str[1] != '/') {
353 *pnext = i;
354 return 0;
355 }
356 }
357
358 return 0;
359 }
360
361
362 /*
363 * \brief searchForProtoSignature()
364 *
365 * \param[in] sa output from cpp, by line
366 * \param[in] begin beginning index to search
367 * \param[out] pstart starting index for function definition
368 * \param[out] pstop index of line on which proto is completed
369 * \param[out] pcharindex char index of completing ')' character
370 * \param[out] pfound 1 if valid signature is found; 0 otherwise
371 * \return 0 if OK, 1 on error
372 *
373 * <pre>
374 * Notes:
375 * (1) If this returns found == 0, it means that there are no
376 * more function definitions in the file. Caller must check
377 * this value and exit the loop over the entire cpp file.
378 * (2) This follows plan 3 (see above). We skip comment and blank
379 * lines at the beginning. Then we don't check for keywords.
380 * Instead, find the relative locations of the first occurrences
381 * of these four tokens: left parenthesis (lp), right
382 * parenthesis (rp), left brace (lb) and semicolon (sc).
383 * (3) The signature of a function definition looks like this:
384 * .... '(' .... ')' '{'
385 * where the lp and rp must both precede the lb, with only
386 * whitespace between the rp and the lb. The '....'
387 * are sets of tokens that have no braces.
388 * (4) If a function definition is found, this returns found = 1,
389 * with 'start' being the first line of the definition and
390 * 'charindex' being the position of the ')' in line 'stop'
391 * at the end of the arg list.
392 * </pre>
393 */
394 static l_int32
395 searchForProtoSignature(SARRAY *sa,
396 l_int32 begin,
397 l_int32 *pstart,
398 l_int32 *pstop,
399 l_int32 *pcharindex,
400 l_int32 *pfound)
401 {
402 l_int32 next, rbline, rbindex, scline;
403 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc;
404 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc;
405 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc;
406
407 if (!sa)
408 return ERROR_INT("sa not defined", __func__, 1);
409 if (!pstart)
410 return ERROR_INT("&start not defined", __func__, 1);
411 if (!pstop)
412 return ERROR_INT("&stop not defined", __func__, 1);
413 if (!pcharindex)
414 return ERROR_INT("&charindex not defined", __func__, 1);
415 if (!pfound)
416 return ERROR_INT("&found not defined", __func__, 1);
417
418 *pfound = FALSE;
419
420 while (1) {
421
422 /* Skip over sequential '#' comment lines */
423 getNextNonCommentLine(sa, begin, &next);
424 if (next == -1) return 0;
425 if (next != begin) {
426 begin = next;
427 continue;
428 }
429
430 /* Skip over sequential blank lines */
431 getNextNonBlankLine(sa, begin, &next);
432 if (next == -1) return 0;
433 if (next != begin) {
434 begin = next;
435 continue;
436 }
437
438 /* Skip over sequential lines starting with '//' */
439 getNextNonDoubleSlashLine(sa, begin, &next);
440 if (next == -1) return 0;
441 if (next != begin) {
442 begin = next;
443 continue;
444 }
445
446 /* Search for specific character sequence patterns; namely
447 * a lp, a matching rp, a lb and a semicolon.
448 * Abort the search if no lp is found. */
449 getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
450 &toffsetlp);
451 if (soffsetlp == -1)
452 break;
453 getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
454 &soffsetrp, &boffsetrp, &toffsetrp);
455 getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
456 &toffsetlb);
457 getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
458 &toffsetsc);
459
460 /* We've found a lp. Now weed out the case where a matching
461 * rp and a lb are not both found. */
462 if (soffsetrp == -1 || soffsetlb == -1)
463 break;
464
465 /* Check if a left brace occurs before a left parenthesis;
466 * if so, skip it */
467 if (toffsetlb < toffsetlp) {
468 skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
469 &rbline, &rbindex);
470 skipToSemicolon(sa, rbline, rbindex, &scline);
471 begin = scline + 1;
472 continue;
473 }
474
475 /* Check if a semicolon occurs before a left brace or
476 * a left parenthesis; if so, skip it */
477 if ((soffsetsc != -1) &&
478 (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
479 skipToSemicolon(sa, next, 0, &scline);
480 begin = scline + 1;
481 continue;
482 }
483
484 /* OK, it should be a function definition. We haven't
485 * checked that there is only white space between the
486 * rp and lb, but we've only seen problems with two
487 * extern inlines in sys/stat.h, and this is handled
488 * later by eliminating any prototype beginning with 'extern'. */
489 *pstart = next;
490 *pstop = next + soffsetrp;
491 *pcharindex = boffsetrp;
492 *pfound = TRUE;
493 break;
494 }
495
496 return 0;
497 }
498
499
500 /*
501 * \brief captureProtoSignature()
502 *
503 * \param[in] sa output from cpp, by line
504 * \param[in] start starting index to search; never a comment line
505 * \param[in] stop index of line on which pattern is completed
506 * \param[in] charindex char index of completing ')' character
507 * \return cleanstr prototype string, or NULL on error
508 *
509 * <pre>
510 * Notes:
511 * (1) Return all characters, ending with a ';' after the ')'
512 * </pre>
513 */
514 static char *
515 captureProtoSignature(SARRAY *sa,
516 l_int32 start,
517 l_int32 stop,
518 l_int32 charindex)
519 {
520 char *str, *newstr, *protostr, *cleanstr;
521 SARRAY *sap;
522 l_int32 i;
523
524 if (!sa)
525 return (char *)ERROR_PTR("sa not defined", __func__, NULL);
526
527 sap = sarrayCreate(0);
528 for (i = start; i < stop; i++) {
529 str = sarrayGetString(sa, i, L_COPY);
530 sarrayAddString(sap, str, L_INSERT);
531 }
532 str = sarrayGetString(sa, stop, L_COPY);
533 str[charindex + 1] = '\0';
534 newstr = stringJoin(str, ";");
535 sarrayAddString(sap, newstr, L_INSERT);
536 LEPT_FREE(str);
537 protostr = sarrayToString(sap, 2);
538 sarrayDestroy(&sap);
539 cleanstr = cleanProtoSignature(protostr);
540 LEPT_FREE(protostr);
541
542 return cleanstr;
543 }
544
545
546 /*
547 * \brief cleanProtoSignature()
548 *
549 * \param[in] instr input prototype string
550 * \return cleanstr clean prototype string, or NULL on error
551 *
552 * <pre>
553 * Notes:
554 * (1) Adds 'extern' at beginning and regularizes spaces
555 * between tokens.
556 * </pre>
557 */
558 static char *
559 cleanProtoSignature(char *instr)
560 {
561 char *str, *cleanstr;
562 char buf[L_BUF_SIZE];
563 char externstring[] = "extern";
564 l_int32 i, j, nwords, nchars, index, len;
565 SARRAY *sa, *saout;
566
567 if (!instr)
568 return (char *)ERROR_PTR("instr not defined", __func__, NULL);
569
570 sa = sarrayCreateWordsFromString(instr);
571 nwords = sarrayGetCount(sa);
572 saout = sarrayCreate(0);
573 sarrayAddString(saout, externstring, L_COPY);
574 for (i = 0; i < nwords; i++) {
575 str = sarrayGetString(sa, i, L_NOCOPY);
576 nchars = strlen(str);
577 index = 0;
578 for (j = 0; j < nchars; j++) {
579 if (index > L_BUF_SIZE - 6) {
580 sarrayDestroy(&sa);
581 sarrayDestroy(&saout);
582 return (char *)ERROR_PTR("token too large", __func__, NULL);
583 }
584 if (str[j] == '(') {
585 buf[index++] = ' ';
586 buf[index++] = '(';
587 buf[index++] = ' ';
588 } else if (str[j] == ')') {
589 buf[index++] = ' ';
590 buf[index++] = ')';
591 } else {
592 buf[index++] = str[j];
593 }
594 }
595 buf[index] = '\0';
596 sarrayAddString(saout, buf, L_COPY);
597 }
598
599 /* Flatten to a prototype string with spaces added after
600 * each word, and remove the last space */
601 cleanstr = sarrayToString(saout, 2);
602 len = strlen(cleanstr);
603 cleanstr[len - 1] = '\0';
604
605 sarrayDestroy(&sa);
606 sarrayDestroy(&saout);
607 return cleanstr;
608 }
609
610
611 /*
612 * \brief skipToEndOfFunction()
613 *
614 * \param[in] sa output from cpp, by line
615 * \param[in] start index of starting line with left bracket to search
616 * \param[in] lbindex starting char index for left bracket
617 * \param[out] pnext index of line following the ending '}' for function
618 * \return 0 if OK, 1 on error
619 */
620 static l_int32
621 skipToEndOfFunction(SARRAY *sa,
622 l_int32 start,
623 l_int32 lbindex,
624 l_int32 *pnext)
625 {
626 l_int32 end, rbindex;
627 l_int32 soffsetlb, boffsetlb, toffsetlb;
628
629 if (!sa)
630 return ERROR_INT("sa not defined", __func__, 1);
631 if (!pnext)
632 return ERROR_INT("&next not defined", __func__, 1);
633
634 getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
635 &toffsetlb);
636 skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
637 if (end == -1) { /* shouldn't happen! */
638 *pnext = -1;
639 return 1;
640 }
641
642 *pnext = end + 1;
643 return 0;
644 }
645
646
647 /*
648 * \brief skipToMatchingBrace()
649 *
650 * \param[in] sa output from cpp, by line
651 * \param[in] start index of starting line with left bracket to search
652 * \param[in] lbindex starting char index for left bracket
653 * \param[out] pstop index of line with the matching right bracket
654 * \param[out] prbindex char index of matching right bracket
655 * \return 0 if OK, 1 on error
656 *
657 * <pre>
658 * Notes:
659 * (1) If the matching right brace is not found, returns
660 * stop = -1. This shouldn't happen.
661 * </pre>
662 */
663 static l_int32
664 skipToMatchingBrace(SARRAY *sa,
665 l_int32 start,
666 l_int32 lbindex,
667 l_int32 *pstop,
668 l_int32 *prbindex)
669 {
670 char *str;
671 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars;
672
673 if (!sa)
674 return ERROR_INT("sa not defined", __func__, 1);
675 if (!pstop)
676 return ERROR_INT("&stop not defined", __func__, 1);
677 if (!prbindex)
678 return ERROR_INT("&rbindex not defined", __func__, 1);
679
680 instring = 0; /* init to FALSE; toggle on double quotes */
681 *pstop = -1;
682 n = sarrayGetCount(sa);
683 sumbrace = 1;
684 found = FALSE;
685 for (i = start; i < n; i++) {
686 str = sarrayGetString(sa, i, L_NOCOPY);
687 jstart = 0;
688 if (i == start)
689 jstart = lbindex + 1;
690 nchars = strlen(str);
691 for (j = jstart; j < nchars; j++) {
692 /* Toggle the instring state every time you encounter
693 * a double quote that is NOT escaped. */
694 if (j == jstart && str[j] == '\"')
695 instring = 1 - instring;
696 if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
697 instring = 1 - instring;
698 /* Record the braces if they are neither a literal character
699 * nor within a string. */
700 if (str[j] == '{' && str[j+1] != '\'' && !instring) {
701 sumbrace++;
702 } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
703 sumbrace--;
704 if (sumbrace == 0) {
705 found = TRUE;
706 *prbindex = j;
707 break;
708 }
709 }
710 }
711 if (found) {
712 *pstop = i;
713 return 0;
714 }
715 }
716
717 return ERROR_INT("matching right brace not found", __func__, 1);
718 }
719
720
721 /*
722 * \brief skipToSemicolon()
723 *
724 * \param[in] sa output from cpp, by line
725 * \param[in] start index of starting line to search
726 * \param[in] charindex starting char index for search
727 * \param[out] pnext index of line containing the next ';'
728 * \return 0 if OK, 1 on error
729 *
730 * <pre>
731 * Notes:
732 * (1) If the semicolon isn't found, returns next = -1.
733 * This shouldn't happen.
734 * (2) This is only used in contexts where the semicolon is
735 * not within a string.
736 * </pre>
737 */
738 static l_int32
739 skipToSemicolon(SARRAY *sa,
740 l_int32 start,
741 l_int32 charindex,
742 l_int32 *pnext)
743 {
744 char *str;
745 l_int32 i, j, n, jstart, nchars, found;
746
747 if (!sa)
748 return ERROR_INT("sa not defined", __func__, 1);
749 if (!pnext)
750 return ERROR_INT("&next not defined", __func__, 1);
751
752 *pnext = -1;
753 n = sarrayGetCount(sa);
754 found = FALSE;
755 for (i = start; i < n; i++) {
756 str = sarrayGetString(sa, i, L_NOCOPY);
757 jstart = 0;
758 if (i == start)
759 jstart = charindex + 1;
760 nchars = strlen(str);
761 for (j = jstart; j < nchars; j++) {
762 if (str[j] == ';') {
763 found = TRUE;;
764 break;
765 }
766 }
767 if (found) {
768 *pnext = i;
769 return 0;
770 }
771 }
772
773 return ERROR_INT("semicolon not found", __func__, 1);
774 }
775
776
777 /*
778 * \brief getOffsetForCharacter()
779 *
780 * \param[in] sa output from cpp, by line
781 * \param[in] start starting index in sa to search;
782 * never a comment line
783 * \param[in] tchar we are searching for the first instance of this
784 * \param[out] psoffset offset in strings from start index
785 * \param[out] pboffset offset in bytes within string in which
786 * the character is first found
787 * \param[out] ptoffset offset in total bytes from beginning of string
788 * indexed by 'start' to the location where
789 * the character is first found
790 * \return 0 if OK, 1 on error
791 *
792 * <pre>
793 * Notes:
794 * (1) We are searching for the first instance of 'tchar', starting
795 * at the beginning of the string indexed by start.
796 * (2) If the character is not found, soffset is returned as -1,
797 * and the other offsets are set to very large numbers. The
798 * caller must check the value of soffset.
799 * (3) This is only used in contexts where it is not necessary to
800 * consider if the character is inside a string.
801 * </pre>
802 */
803 static l_int32
804 getOffsetForCharacter(SARRAY *sa,
805 l_int32 start,
806 char tchar,
807 l_int32 *psoffset,
808 l_int32 *pboffset,
809 l_int32 *ptoffset)
810 {
811 char *str;
812 l_int32 i, j, n, nchars, totchars, found;
813
814 if (!sa)
815 return ERROR_INT("sa not defined", __func__, 1);
816 if (!psoffset)
817 return ERROR_INT("&soffset not defined", __func__, 1);
818 if (!pboffset)
819 return ERROR_INT("&boffset not defined", __func__, 1);
820 if (!ptoffset)
821 return ERROR_INT("&toffset not defined", __func__, 1);
822
823 *psoffset = -1; /* init to not found */
824 *pboffset = 100000000;
825 *ptoffset = 100000000;
826
827 n = sarrayGetCount(sa);
828 found = FALSE;
829 totchars = 0;
830 for (i = start; i < n; i++) {
831 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
832 return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
833 nchars = strlen(str);
834 for (j = 0; j < nchars; j++) {
835 if (str[j] == tchar) {
836 found = TRUE;
837 break;
838 }
839 }
840 if (found)
841 break;
842 totchars += nchars;
843 }
844
845 if (found) {
846 *psoffset = i - start;
847 *pboffset = j;
848 *ptoffset = totchars + j;
849 }
850
851 return 0;
852 }
853
854
855 /*
856 * \brief getOffsetForMatchingRP()
857 *
858 * \param[in] sa output from cpp, by line
859 * \param[in] start starting index in sa to search;
860 * never a comment line
861 * \param[in] soffsetlp string offset to first LP
862 * \param[in] boffsetlp byte offset within string to first LP
863 * \param[in] toffsetlp total byte offset to first LP
864 * \param[out] psoffset offset in strings from start index
865 * \param[out] pboffset offset in bytes within string in which
866 * the matching RP is found
867 * \param[out] ptoffset offset in total bytes from beginning of string
868 * indexed by 'start' to the location where
869 * the matching RP is found
870 * \return 0 if OK, 1 on error
871 *
872 * <pre>
873 * Notes:
874 * (1) We are searching for the matching right parenthesis (RP) that
875 * corresponds to the first LP found beginning at the string
876 * indexed by start.
877 * (2) If the matching RP is not found, soffset is returned as -1,
878 * and the other offsets are set to very large numbers. The
879 * caller must check the value of soffset.
880 * (3) This is only used in contexts where it is not necessary to
881 * consider if the character is inside a string.
882 * (4) We must do this because although most arg lists have a single
883 * left and right parenthesis, it is possible to construct
884 * more complicated prototype declarations, such as those
885 * where functions are passed in. The C++ rules for prototypes
886 * are strict, and require that for functions passed in as args,
887 * the function name arg be placed in parenthesis, as well
888 * as its arg list, thus incurring two extra levels of parentheses.
889 * </pre>
890 */
891 static l_int32
892 getOffsetForMatchingRP(SARRAY *sa,
893 l_int32 start,
894 l_int32 soffsetlp,
895 l_int32 boffsetlp,
896 l_int32 toffsetlp,
897 l_int32 *psoffset,
898 l_int32 *pboffset,
899 l_int32 *ptoffset)
900 {
901 char *str;
902 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
903
904 if (!sa)
905 return ERROR_INT("sa not defined", __func__, 1);
906 if (!psoffset)
907 return ERROR_INT("&soffset not defined", __func__, 1);
908 if (!pboffset)
909 return ERROR_INT("&boffset not defined", __func__, 1);
910 if (!ptoffset)
911 return ERROR_INT("&toffset not defined", __func__, 1);
912
913 *psoffset = -1; /* init to not found */
914 *pboffset = 100000000;
915 *ptoffset = 100000000;
916
917 n = sarrayGetCount(sa);
918 found = FALSE;
919 totchars = toffsetlp;
920 leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */
921 firstline = start + soffsetlp;
922 for (i = firstline; i < n; i++) {
923 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
924 return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
925 nchars = strlen(str);
926 jstart = 0;
927 if (i == firstline)
928 jstart = boffsetlp + 1;
929 for (j = jstart; j < nchars; j++) {
930 if (str[j] == '(')
931 leftmatch++;
932 else if (str[j] == ')')
933 leftmatch--;
934 if (leftmatch == 0) {
935 found = TRUE;
936 break;
937 }
938 }
939 if (found)
940 break;
941 if (i == firstline)
942 totchars += nchars - boffsetlp;
943 else
944 totchars += nchars;
945 }
946
947 if (found) {
948 *psoffset = i - start;
949 *pboffset = j;
950 *ptoffset = totchars + j;
951 }
952
953 return 0;
954 }