Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/parseprotos.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /* | |
| 28 * \file parseprotos.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * char *parseForProtos() | |
| 32 * | |
| 33 * Static helpers | |
| 34 * static l_int32 getNextNonCommentLine() | |
| 35 * static l_int32 getNextNonBlankLine() | |
| 36 * static l_int32 getNextNonDoubleSlashLine() | |
| 37 * static l_int32 searchForProtoSignature() | |
| 38 * static char *captureProtoSignature() | |
| 39 * static char *cleanProtoSignature() | |
| 40 * static l_int32 skipToEndOfFunction() | |
| 41 * static l_int32 skipToMatchingBrace() | |
| 42 * static l_int32 skipToSemicolon() | |
| 43 * static l_int32 getOffsetForCharacter() | |
| 44 * static l_int32 getOffsetForMatchingRP() | |
| 45 * </pre> | |
| 46 */ | |
| 47 | |
| 48 #ifdef HAVE_CONFIG_H | |
| 49 #include <config_auto.h> | |
| 50 #endif /* HAVE_CONFIG_H */ | |
| 51 | |
| 52 #include <string.h> | |
| 53 #include "allheaders.h" | |
| 54 | |
| 55 #define L_BUF_SIZE 2048 /* max token size */ | |
| 56 | |
| 57 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext); | |
| 58 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext); | |
| 59 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start, | |
| 60 l_int32 *pnext); | |
| 61 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin, | |
| 62 l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex, | |
| 63 l_int32 *pfound); | |
| 64 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop, | |
| 65 l_int32 charindex); | |
| 66 static char * cleanProtoSignature(char *str); | |
| 67 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start, | |
| 68 l_int32 charindex, l_int32 *pnext); | |
| 69 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start, | |
| 70 l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex); | |
| 71 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start, | |
| 72 l_int32 charindex, l_int32 *pnext); | |
| 73 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar, | |
| 74 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); | |
| 75 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start, | |
| 76 l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp, | |
| 77 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); | |
| 78 | |
| 79 | |
| 80 /* | |
| 81 * \brief parseForProtos() | |
| 82 * | |
| 83 * \param[in] filein output of cpp | |
| 84 * \param[in] prestring [optional] string that prefaces each decl; | |
| 85 * use NULL to omit | |
| 86 * \return parsestr string of function prototypes, or NULL on error | |
| 87 * | |
| 88 * <pre> | |
| 89 * Notes: | |
| 90 * (1) We parse the output of cpp: | |
| 91 * cpp -ansi <filein> | |
| 92 * Three plans were attempted, with success on the third. | |
| 93 * (2) Plan 1. A cursory examination of the cpp output indicated that | |
| 94 * every function was preceded by a cpp comment statement. | |
| 95 * So we just need to look at statements beginning after comments. | |
| 96 * Unfortunately, this is NOT the case. Some functions start | |
| 97 * without cpp comment lines, typically when there are no | |
| 98 * comments in the source that immediately precede the function. | |
| 99 * (3) Plan 2. Consider the keywords in the language that start | |
| 100 * parts of the cpp file. Some, like 'enum', 'union' and | |
| 101 * 'struct', are followed after a while by '{', and eventually | |
| 102 * end with '}, plus an optional token and a final ';'. | |
| 103 * Others, like 'extern', 'static' and 'typedef', are never | |
| 104 * the beginnings of global function definitions. Function | |
| 105 * prototypes have one or more sets of '(' followed eventually | |
| 106 * by a ')', and end with ';'. But function definitions have | |
| 107 * tokens, followed by '(', more tokens, ')' and then | |
| 108 * immediately a '{'. We would generate a prototype from this | |
| 109 * by adding a ';' to all tokens up to the ')'. So we use | |
| 110 * these special tokens to decide what we are parsing. And | |
| 111 * whenever a function definition is found and the prototype | |
| 112 * extracted, we skip through the rest of the function | |
| 113 * past the corresponding '}'. This token ends a line, and | |
| 114 * is often on a line of its own. But as it turns out, | |
| 115 * the only keyword we need to consider is 'static'. | |
| 116 * (4) Plan 3. Consider the parentheses and braces for various | |
| 117 * declarations. A struct, enum, or union has a pair of | |
| 118 * braces followed by a semicolon. With the exception of an | |
| 119 * __attribute__ declaration for a struct, they cannot have parentheses | |
| 120 * before the left brace, but a struct can have lots of parentheses | |
| 121 * within the brace set. A function prototype has no braces. | |
| 122 * A function declaration can have sets of left and right | |
| 123 * parentheses, but these are followed by a left brace. | |
| 124 * So plan 3 looks at the way parentheses and braces are | |
| 125 * organized. Once the beginning of a function definition | |
| 126 * is found, the prototype is extracted and we search for | |
| 127 * the ending right brace. | |
| 128 * (5) To find the ending right brace, it is necessary to do some | |
| 129 * careful parsing. For example, in this file, we have | |
| 130 * left and right braces as characters, and these must not | |
| 131 * be counted. Somewhat more tricky, the file fhmtauto.c | |
| 132 * generates code, and includes a right brace in a string. | |
| 133 * So we must not include braces that are in strings. But how | |
| 134 * do we know if something is inside a string? Keep state, | |
| 135 * starting with not-inside, and every time you hit a double quote | |
| 136 * that is not escaped, toggle the condition. Any brace | |
| 137 * found in the state of being within a string is ignored. | |
| 138 * (6) When a prototype is extracted, it is put in a canonical | |
| 139 * form (i.e., cleaned up). Finally, we check that it is | |
| 140 * not static and save it. (If static, it is ignored). | |
| 141 * (7) The %prestring for unix is NULL; it is included here so that | |
| 142 * you can use Microsoft's declaration for importing or | |
| 143 * exporting to a dll. See environ.h for examples of use. | |
| 144 * Here, we set: %prestring = "LEPT_DLL ". Note in particular | |
| 145 * the space character that will separate 'LEPT_DLL' from | |
| 146 * the standard unix prototype that follows. | |
| 147 * </pre> | |
| 148 */ | |
| 149 char * | |
| 150 parseForProtos(const char *filein, | |
| 151 const char *prestring) | |
| 152 { | |
| 153 char *strdata, *str, *newstr, *parsestr, *secondword; | |
| 154 l_int32 start, next, stop, charindex, found; | |
| 155 size_t nbytes; | |
| 156 SARRAY *sa, *saout, *satest; | |
| 157 | |
| 158 if (!filein) | |
| 159 return (char *)ERROR_PTR("filein not defined", __func__, NULL); | |
| 160 | |
| 161 /* Read in the cpp output into memory, one string for each | |
| 162 * line in the file, omitting blank lines. */ | |
| 163 strdata = (char *)l_binaryRead(filein, &nbytes); | |
| 164 sa = sarrayCreateLinesFromString(strdata, 0); | |
| 165 | |
| 166 saout = sarrayCreate(0); | |
| 167 next = 0; | |
| 168 while (1) { /* repeat after each non-static prototype is extracted */ | |
| 169 searchForProtoSignature(sa, next, &start, &stop, &charindex, &found); | |
| 170 if (!found) | |
| 171 break; | |
| 172 /* lept_stderr(" start = %d, stop = %d, charindex = %d\n", | |
| 173 start, stop, charindex); */ | |
| 174 str = captureProtoSignature(sa, start, stop, charindex); | |
| 175 | |
| 176 /* Make sure that the signature found by cpp does not begin with | |
| 177 * static, extern or typedef. We get 'extern' declarations | |
| 178 * from header files, and with some versions of cpp running on | |
| 179 * #include <sys/stat.h> we get something of the form: | |
| 180 * extern ... (( ... )) ... ( ... ) { ... | |
| 181 * For this, the 1st '(' is the lp, the 2nd ')' is the rp, | |
| 182 * and there is a lot of garbage between the rp and the lp. | |
| 183 * It is easiest to simply reject any signature that starts | |
| 184 * with 'extern'. Note also that an 'extern' token has been | |
| 185 * prepended to each prototype, so the 'static' or | |
| 186 * 'extern' keywords we are looking for, if they exist, | |
| 187 * would be the second word. We also have a typedef in | |
| 188 * bmpio.c that has the form: | |
| 189 * typedef struct __attribute__((....)) { ...} ... ; | |
| 190 * This is avoided by blacklisting 'typedef' along with 'extern' | |
| 191 * and 'static'. */ | |
| 192 satest = sarrayCreateWordsFromString(str); | |
| 193 secondword = sarrayGetString(satest, 1, L_NOCOPY); | |
| 194 if (strcmp(secondword, "static") && /* not static */ | |
| 195 strcmp(secondword, "extern") && /* not extern */ | |
| 196 strcmp(secondword, "typedef")) { /* not typedef */ | |
| 197 if (prestring) { /* prepend it to the prototype */ | |
| 198 newstr = stringJoin(prestring, str); | |
| 199 sarrayAddString(saout, newstr, L_INSERT); | |
| 200 LEPT_FREE(str); | |
| 201 } else { | |
| 202 sarrayAddString(saout, str, L_INSERT); | |
| 203 } | |
| 204 } else { | |
| 205 LEPT_FREE(str); | |
| 206 } | |
| 207 sarrayDestroy(&satest); | |
| 208 | |
| 209 skipToEndOfFunction(sa, stop, charindex, &next); | |
| 210 if (next == -1) break; | |
| 211 } | |
| 212 | |
| 213 /* Flatten into a string with newlines between prototypes */ | |
| 214 parsestr = sarrayToString(saout, 1); | |
| 215 LEPT_FREE(strdata); | |
| 216 sarrayDestroy(&sa); | |
| 217 sarrayDestroy(&saout); | |
| 218 | |
| 219 return parsestr; | |
| 220 } | |
| 221 | |
| 222 | |
| 223 /* | |
| 224 * \brief getNextNonCommentLine() | |
| 225 * | |
| 226 * \param[in] sa output from cpp, by line) | |
| 227 * \param[in] start starting index to search) | |
| 228 * \param[out] pnext index of first uncommented line after the start line | |
| 229 * \return 0 if OK, o on error | |
| 230 * | |
| 231 * <pre> | |
| 232 * Notes: | |
| 233 * (1) Skips over all consecutive comment lines, beginning at 'start' | |
| 234 * (2) If all lines to the end are '#' comments, return next = -1 | |
| 235 * </pre> | |
| 236 */ | |
| 237 static l_int32 | |
| 238 getNextNonCommentLine(SARRAY *sa, | |
| 239 l_int32 start, | |
| 240 l_int32 *pnext) | |
| 241 { | |
| 242 char *str; | |
| 243 l_int32 i, n; | |
| 244 | |
| 245 if (!sa) | |
| 246 return ERROR_INT("sa not defined", __func__, 1); | |
| 247 if (!pnext) | |
| 248 return ERROR_INT("&pnext not defined", __func__, 1); | |
| 249 | |
| 250 /* Init for situation where this line and all following are comments */ | |
| 251 *pnext = -1; | |
| 252 | |
| 253 n = sarrayGetCount(sa); | |
| 254 for (i = start; i < n; i++) { | |
| 255 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) | |
| 256 return ERROR_INT("str not returned; shouldn't happen", __func__, 1); | |
| 257 if (str[0] != '#') { | |
| 258 *pnext = i; | |
| 259 return 0; | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 return 0; | |
| 264 } | |
| 265 | |
| 266 | |
| 267 /* | |
| 268 * \brief getNextNonBlankLine() | |
| 269 * | |
| 270 * \param[in] sa output from cpp, by line | |
| 271 * \param[in] start starting index to search | |
| 272 * \param[out] pnext index of first nonblank line after the start line | |
| 273 * \return 0 if OK, 1 on error | |
| 274 * | |
| 275 * <pre> | |
| 276 * Notes: | |
| 277 * (1) Skips over all consecutive blank lines, beginning at 'start' | |
| 278 * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r') | |
| 279 * (3) If all lines to the end are blank, return next = -1 | |
| 280 * </pre> | |
| 281 */ | |
| 282 static l_int32 | |
| 283 getNextNonBlankLine(SARRAY *sa, | |
| 284 l_int32 start, | |
| 285 l_int32 *pnext) | |
| 286 { | |
| 287 char *str; | |
| 288 l_int32 i, j, n, len; | |
| 289 | |
| 290 if (!sa) | |
| 291 return ERROR_INT("sa not defined", __func__, 1); | |
| 292 if (!pnext) | |
| 293 return ERROR_INT("&pnext not defined", __func__, 1); | |
| 294 | |
| 295 /* Init for situation where this line and all following are blank */ | |
| 296 *pnext = -1; | |
| 297 | |
| 298 n = sarrayGetCount(sa); | |
| 299 for (i = start; i < n; i++) { | |
| 300 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) | |
| 301 return ERROR_INT("str not returned; shouldn't happen", __func__, 1); | |
| 302 len = strlen(str); | |
| 303 for (j = 0; j < len; j++) { | |
| 304 if (str[j] != ' ' && str[j] != '\t' | |
| 305 && str[j] != '\n' && str[j] != '\r') { /* non-blank */ | |
| 306 *pnext = i; | |
| 307 return 0; | |
| 308 } | |
| 309 } | |
| 310 } | |
| 311 | |
| 312 return 0; | |
| 313 } | |
| 314 | |
| 315 | |
| 316 /* | |
| 317 * \brief getNextNonDoubleSlashLine() | |
| 318 * | |
| 319 * \param[in] sa output from cpp, by line | |
| 320 * \param[in] start starting index to search | |
| 321 * \param[out] pnext index of first uncommented line after the start line | |
| 322 * \return 0 if OK, 1 on error | |
| 323 * | |
| 324 * <pre> | |
| 325 * Notes: | |
| 326 * (1) Skips over all consecutive '//' lines, beginning at 'start' | |
| 327 * (2) If all lines to the end start with '//', return next = -1 | |
| 328 * </pre> | |
| 329 */ | |
| 330 static l_int32 | |
| 331 getNextNonDoubleSlashLine(SARRAY *sa, | |
| 332 l_int32 start, | |
| 333 l_int32 *pnext) | |
| 334 { | |
| 335 char *str; | |
| 336 l_int32 i, n, len; | |
| 337 | |
| 338 if (!sa) | |
| 339 return ERROR_INT("sa not defined", __func__, 1); | |
| 340 if (!pnext) | |
| 341 return ERROR_INT("&pnext not defined", __func__, 1); | |
| 342 | |
| 343 /* Init for situation where this line and all following | |
| 344 * start with '//' */ | |
| 345 *pnext = -1; | |
| 346 | |
| 347 n = sarrayGetCount(sa); | |
| 348 for (i = start; i < n; i++) { | |
| 349 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) | |
| 350 return ERROR_INT("str not returned; shouldn't happen", __func__, 1); | |
| 351 len = strlen(str); | |
| 352 if (len < 2 || str[0] != '/' || str[1] != '/') { | |
| 353 *pnext = i; | |
| 354 return 0; | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 return 0; | |
| 359 } | |
| 360 | |
| 361 | |
| 362 /* | |
| 363 * \brief searchForProtoSignature() | |
| 364 * | |
| 365 * \param[in] sa output from cpp, by line | |
| 366 * \param[in] begin beginning index to search | |
| 367 * \param[out] pstart starting index for function definition | |
| 368 * \param[out] pstop index of line on which proto is completed | |
| 369 * \param[out] pcharindex char index of completing ')' character | |
| 370 * \param[out] pfound 1 if valid signature is found; 0 otherwise | |
| 371 * \return 0 if OK, 1 on error | |
| 372 * | |
| 373 * <pre> | |
| 374 * Notes: | |
| 375 * (1) If this returns found == 0, it means that there are no | |
| 376 * more function definitions in the file. Caller must check | |
| 377 * this value and exit the loop over the entire cpp file. | |
| 378 * (2) This follows plan 3 (see above). We skip comment and blank | |
| 379 * lines at the beginning. Then we don't check for keywords. | |
| 380 * Instead, find the relative locations of the first occurrences | |
| 381 * of these four tokens: left parenthesis (lp), right | |
| 382 * parenthesis (rp), left brace (lb) and semicolon (sc). | |
| 383 * (3) The signature of a function definition looks like this: | |
| 384 * .... '(' .... ')' '{' | |
| 385 * where the lp and rp must both precede the lb, with only | |
| 386 * whitespace between the rp and the lb. The '....' | |
| 387 * are sets of tokens that have no braces. | |
| 388 * (4) If a function definition is found, this returns found = 1, | |
| 389 * with 'start' being the first line of the definition and | |
| 390 * 'charindex' being the position of the ')' in line 'stop' | |
| 391 * at the end of the arg list. | |
| 392 * </pre> | |
| 393 */ | |
| 394 static l_int32 | |
| 395 searchForProtoSignature(SARRAY *sa, | |
| 396 l_int32 begin, | |
| 397 l_int32 *pstart, | |
| 398 l_int32 *pstop, | |
| 399 l_int32 *pcharindex, | |
| 400 l_int32 *pfound) | |
| 401 { | |
| 402 l_int32 next, rbline, rbindex, scline; | |
| 403 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc; | |
| 404 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc; | |
| 405 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc; | |
| 406 | |
| 407 if (!sa) | |
| 408 return ERROR_INT("sa not defined", __func__, 1); | |
| 409 if (!pstart) | |
| 410 return ERROR_INT("&start not defined", __func__, 1); | |
| 411 if (!pstop) | |
| 412 return ERROR_INT("&stop not defined", __func__, 1); | |
| 413 if (!pcharindex) | |
| 414 return ERROR_INT("&charindex not defined", __func__, 1); | |
| 415 if (!pfound) | |
| 416 return ERROR_INT("&found not defined", __func__, 1); | |
| 417 | |
| 418 *pfound = FALSE; | |
| 419 | |
| 420 while (1) { | |
| 421 | |
| 422 /* Skip over sequential '#' comment lines */ | |
| 423 getNextNonCommentLine(sa, begin, &next); | |
| 424 if (next == -1) return 0; | |
| 425 if (next != begin) { | |
| 426 begin = next; | |
| 427 continue; | |
| 428 } | |
| 429 | |
| 430 /* Skip over sequential blank lines */ | |
| 431 getNextNonBlankLine(sa, begin, &next); | |
| 432 if (next == -1) return 0; | |
| 433 if (next != begin) { | |
| 434 begin = next; | |
| 435 continue; | |
| 436 } | |
| 437 | |
| 438 /* Skip over sequential lines starting with '//' */ | |
| 439 getNextNonDoubleSlashLine(sa, begin, &next); | |
| 440 if (next == -1) return 0; | |
| 441 if (next != begin) { | |
| 442 begin = next; | |
| 443 continue; | |
| 444 } | |
| 445 | |
| 446 /* Search for specific character sequence patterns; namely | |
| 447 * a lp, a matching rp, a lb and a semicolon. | |
| 448 * Abort the search if no lp is found. */ | |
| 449 getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp, | |
| 450 &toffsetlp); | |
| 451 if (soffsetlp == -1) | |
| 452 break; | |
| 453 getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp, | |
| 454 &soffsetrp, &boffsetrp, &toffsetrp); | |
| 455 getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb, | |
| 456 &toffsetlb); | |
| 457 getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc, | |
| 458 &toffsetsc); | |
| 459 | |
| 460 /* We've found a lp. Now weed out the case where a matching | |
| 461 * rp and a lb are not both found. */ | |
| 462 if (soffsetrp == -1 || soffsetlb == -1) | |
| 463 break; | |
| 464 | |
| 465 /* Check if a left brace occurs before a left parenthesis; | |
| 466 * if so, skip it */ | |
| 467 if (toffsetlb < toffsetlp) { | |
| 468 skipToMatchingBrace(sa, next + soffsetlb, boffsetlb, | |
| 469 &rbline, &rbindex); | |
| 470 skipToSemicolon(sa, rbline, rbindex, &scline); | |
| 471 begin = scline + 1; | |
| 472 continue; | |
| 473 } | |
| 474 | |
| 475 /* Check if a semicolon occurs before a left brace or | |
| 476 * a left parenthesis; if so, skip it */ | |
| 477 if ((soffsetsc != -1) && | |
| 478 (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) { | |
| 479 skipToSemicolon(sa, next, 0, &scline); | |
| 480 begin = scline + 1; | |
| 481 continue; | |
| 482 } | |
| 483 | |
| 484 /* OK, it should be a function definition. We haven't | |
| 485 * checked that there is only white space between the | |
| 486 * rp and lb, but we've only seen problems with two | |
| 487 * extern inlines in sys/stat.h, and this is handled | |
| 488 * later by eliminating any prototype beginning with 'extern'. */ | |
| 489 *pstart = next; | |
| 490 *pstop = next + soffsetrp; | |
| 491 *pcharindex = boffsetrp; | |
| 492 *pfound = TRUE; | |
| 493 break; | |
| 494 } | |
| 495 | |
| 496 return 0; | |
| 497 } | |
| 498 | |
| 499 | |
| 500 /* | |
| 501 * \brief captureProtoSignature() | |
| 502 * | |
| 503 * \param[in] sa output from cpp, by line | |
| 504 * \param[in] start starting index to search; never a comment line | |
| 505 * \param[in] stop index of line on which pattern is completed | |
| 506 * \param[in] charindex char index of completing ')' character | |
| 507 * \return cleanstr prototype string, or NULL on error | |
| 508 * | |
| 509 * <pre> | |
| 510 * Notes: | |
| 511 * (1) Return all characters, ending with a ';' after the ')' | |
| 512 * </pre> | |
| 513 */ | |
| 514 static char * | |
| 515 captureProtoSignature(SARRAY *sa, | |
| 516 l_int32 start, | |
| 517 l_int32 stop, | |
| 518 l_int32 charindex) | |
| 519 { | |
| 520 char *str, *newstr, *protostr, *cleanstr; | |
| 521 SARRAY *sap; | |
| 522 l_int32 i; | |
| 523 | |
| 524 if (!sa) | |
| 525 return (char *)ERROR_PTR("sa not defined", __func__, NULL); | |
| 526 | |
| 527 sap = sarrayCreate(0); | |
| 528 for (i = start; i < stop; i++) { | |
| 529 str = sarrayGetString(sa, i, L_COPY); | |
| 530 sarrayAddString(sap, str, L_INSERT); | |
| 531 } | |
| 532 str = sarrayGetString(sa, stop, L_COPY); | |
| 533 str[charindex + 1] = '\0'; | |
| 534 newstr = stringJoin(str, ";"); | |
| 535 sarrayAddString(sap, newstr, L_INSERT); | |
| 536 LEPT_FREE(str); | |
| 537 protostr = sarrayToString(sap, 2); | |
| 538 sarrayDestroy(&sap); | |
| 539 cleanstr = cleanProtoSignature(protostr); | |
| 540 LEPT_FREE(protostr); | |
| 541 | |
| 542 return cleanstr; | |
| 543 } | |
| 544 | |
| 545 | |
| 546 /* | |
| 547 * \brief cleanProtoSignature() | |
| 548 * | |
| 549 * \param[in] instr input prototype string | |
| 550 * \return cleanstr clean prototype string, or NULL on error | |
| 551 * | |
| 552 * <pre> | |
| 553 * Notes: | |
| 554 * (1) Adds 'extern' at beginning and regularizes spaces | |
| 555 * between tokens. | |
| 556 * </pre> | |
| 557 */ | |
| 558 static char * | |
| 559 cleanProtoSignature(char *instr) | |
| 560 { | |
| 561 char *str, *cleanstr; | |
| 562 char buf[L_BUF_SIZE]; | |
| 563 char externstring[] = "extern"; | |
| 564 l_int32 i, j, nwords, nchars, index, len; | |
| 565 SARRAY *sa, *saout; | |
| 566 | |
| 567 if (!instr) | |
| 568 return (char *)ERROR_PTR("instr not defined", __func__, NULL); | |
| 569 | |
| 570 sa = sarrayCreateWordsFromString(instr); | |
| 571 nwords = sarrayGetCount(sa); | |
| 572 saout = sarrayCreate(0); | |
| 573 sarrayAddString(saout, externstring, L_COPY); | |
| 574 for (i = 0; i < nwords; i++) { | |
| 575 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 576 nchars = strlen(str); | |
| 577 index = 0; | |
| 578 for (j = 0; j < nchars; j++) { | |
| 579 if (index > L_BUF_SIZE - 6) { | |
| 580 sarrayDestroy(&sa); | |
| 581 sarrayDestroy(&saout); | |
| 582 return (char *)ERROR_PTR("token too large", __func__, NULL); | |
| 583 } | |
| 584 if (str[j] == '(') { | |
| 585 buf[index++] = ' '; | |
| 586 buf[index++] = '('; | |
| 587 buf[index++] = ' '; | |
| 588 } else if (str[j] == ')') { | |
| 589 buf[index++] = ' '; | |
| 590 buf[index++] = ')'; | |
| 591 } else { | |
| 592 buf[index++] = str[j]; | |
| 593 } | |
| 594 } | |
| 595 buf[index] = '\0'; | |
| 596 sarrayAddString(saout, buf, L_COPY); | |
| 597 } | |
| 598 | |
| 599 /* Flatten to a prototype string with spaces added after | |
| 600 * each word, and remove the last space */ | |
| 601 cleanstr = sarrayToString(saout, 2); | |
| 602 len = strlen(cleanstr); | |
| 603 cleanstr[len - 1] = '\0'; | |
| 604 | |
| 605 sarrayDestroy(&sa); | |
| 606 sarrayDestroy(&saout); | |
| 607 return cleanstr; | |
| 608 } | |
| 609 | |
| 610 | |
| 611 /* | |
| 612 * \brief skipToEndOfFunction() | |
| 613 * | |
| 614 * \param[in] sa output from cpp, by line | |
| 615 * \param[in] start index of starting line with left bracket to search | |
| 616 * \param[in] lbindex starting char index for left bracket | |
| 617 * \param[out] pnext index of line following the ending '}' for function | |
| 618 * \return 0 if OK, 1 on error | |
| 619 */ | |
| 620 static l_int32 | |
| 621 skipToEndOfFunction(SARRAY *sa, | |
| 622 l_int32 start, | |
| 623 l_int32 lbindex, | |
| 624 l_int32 *pnext) | |
| 625 { | |
| 626 l_int32 end, rbindex; | |
| 627 l_int32 soffsetlb, boffsetlb, toffsetlb; | |
| 628 | |
| 629 if (!sa) | |
| 630 return ERROR_INT("sa not defined", __func__, 1); | |
| 631 if (!pnext) | |
| 632 return ERROR_INT("&next not defined", __func__, 1); | |
| 633 | |
| 634 getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb, | |
| 635 &toffsetlb); | |
| 636 skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex); | |
| 637 if (end == -1) { /* shouldn't happen! */ | |
| 638 *pnext = -1; | |
| 639 return 1; | |
| 640 } | |
| 641 | |
| 642 *pnext = end + 1; | |
| 643 return 0; | |
| 644 } | |
| 645 | |
| 646 | |
| 647 /* | |
| 648 * \brief skipToMatchingBrace() | |
| 649 * | |
| 650 * \param[in] sa output from cpp, by line | |
| 651 * \param[in] start index of starting line with left bracket to search | |
| 652 * \param[in] lbindex starting char index for left bracket | |
| 653 * \param[out] pstop index of line with the matching right bracket | |
| 654 * \param[out] prbindex char index of matching right bracket | |
| 655 * \return 0 if OK, 1 on error | |
| 656 * | |
| 657 * <pre> | |
| 658 * Notes: | |
| 659 * (1) If the matching right brace is not found, returns | |
| 660 * stop = -1. This shouldn't happen. | |
| 661 * </pre> | |
| 662 */ | |
| 663 static l_int32 | |
| 664 skipToMatchingBrace(SARRAY *sa, | |
| 665 l_int32 start, | |
| 666 l_int32 lbindex, | |
| 667 l_int32 *pstop, | |
| 668 l_int32 *prbindex) | |
| 669 { | |
| 670 char *str; | |
| 671 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars; | |
| 672 | |
| 673 if (!sa) | |
| 674 return ERROR_INT("sa not defined", __func__, 1); | |
| 675 if (!pstop) | |
| 676 return ERROR_INT("&stop not defined", __func__, 1); | |
| 677 if (!prbindex) | |
| 678 return ERROR_INT("&rbindex not defined", __func__, 1); | |
| 679 | |
| 680 instring = 0; /* init to FALSE; toggle on double quotes */ | |
| 681 *pstop = -1; | |
| 682 n = sarrayGetCount(sa); | |
| 683 sumbrace = 1; | |
| 684 found = FALSE; | |
| 685 for (i = start; i < n; i++) { | |
| 686 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 687 jstart = 0; | |
| 688 if (i == start) | |
| 689 jstart = lbindex + 1; | |
| 690 nchars = strlen(str); | |
| 691 for (j = jstart; j < nchars; j++) { | |
| 692 /* Toggle the instring state every time you encounter | |
| 693 * a double quote that is NOT escaped. */ | |
| 694 if (j == jstart && str[j] == '\"') | |
| 695 instring = 1 - instring; | |
| 696 if (j > jstart && str[j] == '\"' && str[j-1] != '\\') | |
| 697 instring = 1 - instring; | |
| 698 /* Record the braces if they are neither a literal character | |
| 699 * nor within a string. */ | |
| 700 if (str[j] == '{' && str[j+1] != '\'' && !instring) { | |
| 701 sumbrace++; | |
| 702 } else if (str[j] == '}' && str[j+1] != '\'' && !instring) { | |
| 703 sumbrace--; | |
| 704 if (sumbrace == 0) { | |
| 705 found = TRUE; | |
| 706 *prbindex = j; | |
| 707 break; | |
| 708 } | |
| 709 } | |
| 710 } | |
| 711 if (found) { | |
| 712 *pstop = i; | |
| 713 return 0; | |
| 714 } | |
| 715 } | |
| 716 | |
| 717 return ERROR_INT("matching right brace not found", __func__, 1); | |
| 718 } | |
| 719 | |
| 720 | |
| 721 /* | |
| 722 * \brief skipToSemicolon() | |
| 723 * | |
| 724 * \param[in] sa output from cpp, by line | |
| 725 * \param[in] start index of starting line to search | |
| 726 * \param[in] charindex starting char index for search | |
| 727 * \param[out] pnext index of line containing the next ';' | |
| 728 * \return 0 if OK, 1 on error | |
| 729 * | |
| 730 * <pre> | |
| 731 * Notes: | |
| 732 * (1) If the semicolon isn't found, returns next = -1. | |
| 733 * This shouldn't happen. | |
| 734 * (2) This is only used in contexts where the semicolon is | |
| 735 * not within a string. | |
| 736 * </pre> | |
| 737 */ | |
| 738 static l_int32 | |
| 739 skipToSemicolon(SARRAY *sa, | |
| 740 l_int32 start, | |
| 741 l_int32 charindex, | |
| 742 l_int32 *pnext) | |
| 743 { | |
| 744 char *str; | |
| 745 l_int32 i, j, n, jstart, nchars, found; | |
| 746 | |
| 747 if (!sa) | |
| 748 return ERROR_INT("sa not defined", __func__, 1); | |
| 749 if (!pnext) | |
| 750 return ERROR_INT("&next not defined", __func__, 1); | |
| 751 | |
| 752 *pnext = -1; | |
| 753 n = sarrayGetCount(sa); | |
| 754 found = FALSE; | |
| 755 for (i = start; i < n; i++) { | |
| 756 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 757 jstart = 0; | |
| 758 if (i == start) | |
| 759 jstart = charindex + 1; | |
| 760 nchars = strlen(str); | |
| 761 for (j = jstart; j < nchars; j++) { | |
| 762 if (str[j] == ';') { | |
| 763 found = TRUE;; | |
| 764 break; | |
| 765 } | |
| 766 } | |
| 767 if (found) { | |
| 768 *pnext = i; | |
| 769 return 0; | |
| 770 } | |
| 771 } | |
| 772 | |
| 773 return ERROR_INT("semicolon not found", __func__, 1); | |
| 774 } | |
| 775 | |
| 776 | |
| 777 /* | |
| 778 * \brief getOffsetForCharacter() | |
| 779 * | |
| 780 * \param[in] sa output from cpp, by line | |
| 781 * \param[in] start starting index in sa to search; | |
| 782 * never a comment line | |
| 783 * \param[in] tchar we are searching for the first instance of this | |
| 784 * \param[out] psoffset offset in strings from start index | |
| 785 * \param[out] pboffset offset in bytes within string in which | |
| 786 * the character is first found | |
| 787 * \param[out] ptoffset offset in total bytes from beginning of string | |
| 788 * indexed by 'start' to the location where | |
| 789 * the character is first found | |
| 790 * \return 0 if OK, 1 on error | |
| 791 * | |
| 792 * <pre> | |
| 793 * Notes: | |
| 794 * (1) We are searching for the first instance of 'tchar', starting | |
| 795 * at the beginning of the string indexed by start. | |
| 796 * (2) If the character is not found, soffset is returned as -1, | |
| 797 * and the other offsets are set to very large numbers. The | |
| 798 * caller must check the value of soffset. | |
| 799 * (3) This is only used in contexts where it is not necessary to | |
| 800 * consider if the character is inside a string. | |
| 801 * </pre> | |
| 802 */ | |
| 803 static l_int32 | |
| 804 getOffsetForCharacter(SARRAY *sa, | |
| 805 l_int32 start, | |
| 806 char tchar, | |
| 807 l_int32 *psoffset, | |
| 808 l_int32 *pboffset, | |
| 809 l_int32 *ptoffset) | |
| 810 { | |
| 811 char *str; | |
| 812 l_int32 i, j, n, nchars, totchars, found; | |
| 813 | |
| 814 if (!sa) | |
| 815 return ERROR_INT("sa not defined", __func__, 1); | |
| 816 if (!psoffset) | |
| 817 return ERROR_INT("&soffset not defined", __func__, 1); | |
| 818 if (!pboffset) | |
| 819 return ERROR_INT("&boffset not defined", __func__, 1); | |
| 820 if (!ptoffset) | |
| 821 return ERROR_INT("&toffset not defined", __func__, 1); | |
| 822 | |
| 823 *psoffset = -1; /* init to not found */ | |
| 824 *pboffset = 100000000; | |
| 825 *ptoffset = 100000000; | |
| 826 | |
| 827 n = sarrayGetCount(sa); | |
| 828 found = FALSE; | |
| 829 totchars = 0; | |
| 830 for (i = start; i < n; i++) { | |
| 831 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) | |
| 832 return ERROR_INT("str not returned; shouldn't happen", __func__, 1); | |
| 833 nchars = strlen(str); | |
| 834 for (j = 0; j < nchars; j++) { | |
| 835 if (str[j] == tchar) { | |
| 836 found = TRUE; | |
| 837 break; | |
| 838 } | |
| 839 } | |
| 840 if (found) | |
| 841 break; | |
| 842 totchars += nchars; | |
| 843 } | |
| 844 | |
| 845 if (found) { | |
| 846 *psoffset = i - start; | |
| 847 *pboffset = j; | |
| 848 *ptoffset = totchars + j; | |
| 849 } | |
| 850 | |
| 851 return 0; | |
| 852 } | |
| 853 | |
| 854 | |
| 855 /* | |
| 856 * \brief getOffsetForMatchingRP() | |
| 857 * | |
| 858 * \param[in] sa output from cpp, by line | |
| 859 * \param[in] start starting index in sa to search; | |
| 860 * never a comment line | |
| 861 * \param[in] soffsetlp string offset to first LP | |
| 862 * \param[in] boffsetlp byte offset within string to first LP | |
| 863 * \param[in] toffsetlp total byte offset to first LP | |
| 864 * \param[out] psoffset offset in strings from start index | |
| 865 * \param[out] pboffset offset in bytes within string in which | |
| 866 * the matching RP is found | |
| 867 * \param[out] ptoffset offset in total bytes from beginning of string | |
| 868 * indexed by 'start' to the location where | |
| 869 * the matching RP is found | |
| 870 * \return 0 if OK, 1 on error | |
| 871 * | |
| 872 * <pre> | |
| 873 * Notes: | |
| 874 * (1) We are searching for the matching right parenthesis (RP) that | |
| 875 * corresponds to the first LP found beginning at the string | |
| 876 * indexed by start. | |
| 877 * (2) If the matching RP is not found, soffset is returned as -1, | |
| 878 * and the other offsets are set to very large numbers. The | |
| 879 * caller must check the value of soffset. | |
| 880 * (3) This is only used in contexts where it is not necessary to | |
| 881 * consider if the character is inside a string. | |
| 882 * (4) We must do this because although most arg lists have a single | |
| 883 * left and right parenthesis, it is possible to construct | |
| 884 * more complicated prototype declarations, such as those | |
| 885 * where functions are passed in. The C++ rules for prototypes | |
| 886 * are strict, and require that for functions passed in as args, | |
| 887 * the function name arg be placed in parenthesis, as well | |
| 888 * as its arg list, thus incurring two extra levels of parentheses. | |
| 889 * </pre> | |
| 890 */ | |
| 891 static l_int32 | |
| 892 getOffsetForMatchingRP(SARRAY *sa, | |
| 893 l_int32 start, | |
| 894 l_int32 soffsetlp, | |
| 895 l_int32 boffsetlp, | |
| 896 l_int32 toffsetlp, | |
| 897 l_int32 *psoffset, | |
| 898 l_int32 *pboffset, | |
| 899 l_int32 *ptoffset) | |
| 900 { | |
| 901 char *str; | |
| 902 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found; | |
| 903 | |
| 904 if (!sa) | |
| 905 return ERROR_INT("sa not defined", __func__, 1); | |
| 906 if (!psoffset) | |
| 907 return ERROR_INT("&soffset not defined", __func__, 1); | |
| 908 if (!pboffset) | |
| 909 return ERROR_INT("&boffset not defined", __func__, 1); | |
| 910 if (!ptoffset) | |
| 911 return ERROR_INT("&toffset not defined", __func__, 1); | |
| 912 | |
| 913 *psoffset = -1; /* init to not found */ | |
| 914 *pboffset = 100000000; | |
| 915 *ptoffset = 100000000; | |
| 916 | |
| 917 n = sarrayGetCount(sa); | |
| 918 found = FALSE; | |
| 919 totchars = toffsetlp; | |
| 920 leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */ | |
| 921 firstline = start + soffsetlp; | |
| 922 for (i = firstline; i < n; i++) { | |
| 923 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL) | |
| 924 return ERROR_INT("str not returned; shouldn't happen", __func__, 1); | |
| 925 nchars = strlen(str); | |
| 926 jstart = 0; | |
| 927 if (i == firstline) | |
| 928 jstart = boffsetlp + 1; | |
| 929 for (j = jstart; j < nchars; j++) { | |
| 930 if (str[j] == '(') | |
| 931 leftmatch++; | |
| 932 else if (str[j] == ')') | |
| 933 leftmatch--; | |
| 934 if (leftmatch == 0) { | |
| 935 found = TRUE; | |
| 936 break; | |
| 937 } | |
| 938 } | |
| 939 if (found) | |
| 940 break; | |
| 941 if (i == firstline) | |
| 942 totchars += nchars - boffsetlp; | |
| 943 else | |
| 944 totchars += nchars; | |
| 945 } | |
| 946 | |
| 947 if (found) { | |
| 948 *psoffset = i - start; | |
| 949 *pboffset = j; | |
| 950 *ptoffset = totchars + j; | |
| 951 } | |
| 952 | |
| 953 return 0; | |
| 954 } |
