Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/bmf.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file bmf.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Acquisition and generation of bitmap fonts. | |
| 32 * | |
| 33 * L_BMF *bmfCreate() | |
| 34 * L_BMF *bmfDestroy() | |
| 35 * | |
| 36 * PIX *bmfGetPix() | |
| 37 * l_int32 bmfGetWidth() | |
| 38 * l_int32 bmfGetBaseline() | |
| 39 * | |
| 40 * PIXA *pixaGetFont() | |
| 41 * l_int32 pixaSaveFont() | |
| 42 * static PIXA *pixaGenerateFontFromFile() | |
| 43 * static PIXA *pixaGenerateFontFromString() | |
| 44 * static PIXA *pixaGenerateFont() | |
| 45 * static l_int32 pixGetTextBaseline() | |
| 46 * static l_int32 bmfMakeAsciiTables() | |
| 47 * | |
| 48 * This is not a very general utility, because it only uses bitmap | |
| 49 * representations of a single font, Palatino-Roman, with the | |
| 50 * normal style. It uses bitmaps generated for nine sizes, from | |
| 51 * 4 to 20 pts, rendered at 300 ppi. Generalization to different | |
| 52 * fonts, styles and sizes is straightforward. | |
| 53 * | |
| 54 * I chose Palatino-Roman is because I like it. | |
| 55 * The input font images were generated from a set of small | |
| 56 * PostScript files, such as chars-12.ps, which were rendered | |
| 57 * into the inputfont[] bitmap files using GhostScript. See, for | |
| 58 * example, the bash script prog/ps2tiff, which will "rip" a | |
| 59 * PostScript file into a set of ccitt-g4 compressed tiff files. | |
| 60 * | |
| 61 * The set of ascii characters from 32 through 126 are the 95 | |
| 62 * printable ascii chars. Palatino-Roman is missing char 92, '\'. | |
| 63 * I have substituted an LR flip of '/', char 47, for 92, so that | |
| 64 * there are no missing printable chars in this set. The space is | |
| 65 * char 32, and I have given it a width equal to twice the width of '!'. | |
| 66 * </pre> | |
| 67 */ | |
| 68 | |
| 69 #ifdef HAVE_CONFIG_H | |
| 70 #include <config_auto.h> | |
| 71 #endif /* HAVE_CONFIG_H */ | |
| 72 | |
| 73 #include <string.h> | |
| 74 #include "allheaders.h" | |
| 75 #include "pix_internal.h" | |
| 76 #include "bmfdata.h" | |
| 77 | |
| 78 static const l_float32 VertFractSep = 0.3f; | |
| 79 | |
| 80 #ifndef NO_CONSOLE_IO | |
| 81 #define DEBUG_BASELINE 0 | |
| 82 #define DEBUG_CHARS 0 | |
| 83 #define DEBUG_FONT_GEN 0 | |
| 84 #endif /* ~NO_CONSOLE_IO */ | |
| 85 | |
| 86 static PIXA *pixaGenerateFontFromFile(const char *dir, l_int32 fontsize, | |
| 87 l_int32 *pbl0, l_int32 *pbl1, | |
| 88 l_int32 *pbl2); | |
| 89 static PIXA *pixaGenerateFontFromString(l_int32 fontsize, l_int32 *pbl0, | |
| 90 l_int32 *pbl1, l_int32 *pbl2); | |
| 91 static PIXA *pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0, | |
| 92 l_int32 *pbl1, l_int32 *pbl2); | |
| 93 static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py); | |
| 94 static l_int32 bmfMakeAsciiTables(L_BMF *bmf); | |
| 95 | |
| 96 /*---------------------------------------------------------------------*/ | |
| 97 /* Bmf create/destroy */ | |
| 98 /*---------------------------------------------------------------------*/ | |
| 99 /*! | |
| 100 * \brief bmfCreate() | |
| 101 * | |
| 102 * \param[in] dir [optional] directory holding pixa of character set | |
| 103 * \param[in] fontsize 4, 6, 8, ... , 20 | |
| 104 * \return bmf holding the bitmap font and associated information | |
| 105 * | |
| 106 * <pre> | |
| 107 * Notes: | |
| 108 * (1) If %dir == null, this generates the font bitmaps from a | |
| 109 * compiled string. | |
| 110 * (2) Otherwise, this tries to read a pre-computed pixa file with the | |
| 111 * 95 ascii chars in it. If the file is not found, it then | |
| 112 * attempts to generate the pixa and associated baseline | |
| 113 * data from a tiff image containing all the characters. If | |
| 114 * that fails, it uses the compiled string. | |
| 115 * </pre> | |
| 116 */ | |
| 117 L_BMF * | |
| 118 bmfCreate(const char *dir, | |
| 119 l_int32 fontsize) | |
| 120 { | |
| 121 L_BMF *bmf; | |
| 122 PIXA *pixa; | |
| 123 | |
| 124 if (fontsize < 4 || fontsize > 20 || (fontsize % 2)) | |
| 125 return (L_BMF *)ERROR_PTR("fontsize must be in {4, 6, ..., 20}", | |
| 126 __func__, NULL); | |
| 127 | |
| 128 bmf = (L_BMF *)LEPT_CALLOC(1, sizeof(L_BMF)); | |
| 129 | |
| 130 if (!dir) { /* Generate from a string */ | |
| 131 pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1, | |
| 132 &bmf->baseline2, &bmf->baseline3); | |
| 133 } else { /* Look for the pixa in a directory */ | |
| 134 pixa = pixaGetFont(dir, fontsize, &bmf->baseline1, &bmf->baseline2, | |
| 135 &bmf->baseline3); | |
| 136 if (!pixa) { /* Not found; make it from a file */ | |
| 137 L_INFO("Generating pixa of bitmap fonts from file\n", __func__); | |
| 138 pixa = pixaGenerateFontFromFile(dir, fontsize, &bmf->baseline1, | |
| 139 &bmf->baseline2, &bmf->baseline3); | |
| 140 if (!pixa) { /* Not made; make it from a string after all */ | |
| 141 L_ERROR("Failed to make font; use string\n", __func__); | |
| 142 pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1, | |
| 143 &bmf->baseline2, &bmf->baseline3); | |
| 144 } | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 if (!pixa) { | |
| 149 bmfDestroy(&bmf); | |
| 150 return (L_BMF *)ERROR_PTR("font pixa not made", __func__, NULL); | |
| 151 } | |
| 152 | |
| 153 bmf->pixa = pixa; | |
| 154 bmf->size = fontsize; | |
| 155 if (dir) bmf->directory = stringNew(dir); | |
| 156 bmfMakeAsciiTables(bmf); | |
| 157 return bmf; | |
| 158 } | |
| 159 | |
| 160 | |
| 161 /*! | |
| 162 * \brief bmfDestroy() | |
| 163 * | |
| 164 * \param[in,out] pbmf will be set to null before returning | |
| 165 * \return void | |
| 166 */ | |
| 167 void | |
| 168 bmfDestroy(L_BMF **pbmf) | |
| 169 { | |
| 170 L_BMF *bmf; | |
| 171 | |
| 172 if (pbmf == NULL) { | |
| 173 L_WARNING("ptr address is null!\n", __func__); | |
| 174 return; | |
| 175 } | |
| 176 | |
| 177 if ((bmf = *pbmf) == NULL) | |
| 178 return; | |
| 179 | |
| 180 pixaDestroy(&bmf->pixa); | |
| 181 LEPT_FREE(bmf->directory); | |
| 182 LEPT_FREE(bmf->fonttab); | |
| 183 LEPT_FREE(bmf->baselinetab); | |
| 184 LEPT_FREE(bmf); | |
| 185 *pbmf = NULL; | |
| 186 } | |
| 187 | |
| 188 | |
| 189 /*---------------------------------------------------------------------*/ | |
| 190 /* Bmf accessors */ | |
| 191 /*---------------------------------------------------------------------*/ | |
| 192 /*! | |
| 193 * \brief bmfGetPix() | |
| 194 * | |
| 195 * \param[in] bmf | |
| 196 * \param[in] chr should be one of the 95 supported printable bitmaps | |
| 197 * \return pix clone of pix in bmf, or NULL on error | |
| 198 */ | |
| 199 PIX * | |
| 200 bmfGetPix(L_BMF *bmf, | |
| 201 char chr) | |
| 202 { | |
| 203 l_int32 i, index; | |
| 204 PIXA *pixa; | |
| 205 | |
| 206 if ((index = (l_int32)chr) == 10) /* NL */ | |
| 207 return NULL; | |
| 208 if (!bmf) | |
| 209 return (PIX *)ERROR_PTR("bmf not defined", __func__, NULL); | |
| 210 if (index < 32 || index >= 127) | |
| 211 return (PIX *)ERROR_PTR("invalid index", __func__, NULL); | |
| 212 | |
| 213 i = bmf->fonttab[index]; | |
| 214 if (i == UNDEF) { | |
| 215 L_ERROR("no bitmap representation for %d\n", __func__, index); | |
| 216 return NULL; | |
| 217 } | |
| 218 | |
| 219 if ((pixa = bmf->pixa) == NULL) | |
| 220 return (PIX *)ERROR_PTR("pixa not found", __func__, NULL); | |
| 221 | |
| 222 return pixaGetPix(pixa, i, L_CLONE); | |
| 223 } | |
| 224 | |
| 225 | |
| 226 /*! | |
| 227 * \brief bmfGetWidth() | |
| 228 * | |
| 229 * \param[in] bmf | |
| 230 * \param[in] chr should be one of the 95 supported bitmaps | |
| 231 * \param[out] pw character width; -1 if not printable | |
| 232 * \return 0 if OK, 1 on error | |
| 233 */ | |
| 234 l_ok | |
| 235 bmfGetWidth(L_BMF *bmf, | |
| 236 char chr, | |
| 237 l_int32 *pw) | |
| 238 { | |
| 239 l_int32 i, index; | |
| 240 PIXA *pixa; | |
| 241 | |
| 242 if (!pw) | |
| 243 return ERROR_INT("&w not defined", __func__, 1); | |
| 244 *pw = -1; | |
| 245 if (!bmf) | |
| 246 return ERROR_INT("bmf not defined", __func__, 1); | |
| 247 if ((index = (l_int32)chr) == 10) /* NL */ | |
| 248 return 0; | |
| 249 if (index < 32 || index >= 127) | |
| 250 return ERROR_INT("invalid index", __func__, 1); | |
| 251 | |
| 252 i = bmf->fonttab[index]; | |
| 253 if (i == UNDEF) { | |
| 254 L_ERROR("no bitmap representation for %d\n", __func__, index); | |
| 255 return 1; | |
| 256 } | |
| 257 | |
| 258 if ((pixa = bmf->pixa) == NULL) | |
| 259 return ERROR_INT("pixa not found", __func__, 1); | |
| 260 | |
| 261 return pixaGetPixDimensions(pixa, i, pw, NULL, NULL); | |
| 262 } | |
| 263 | |
| 264 | |
| 265 /*! | |
| 266 * \brief bmfGetBaseline() | |
| 267 * | |
| 268 * \param[in] bmf | |
| 269 * \param[in] chr should be one of the 95 supported bitmaps | |
| 270 * \param[out] pbaseline distance below UL corner of bitmap char | |
| 271 * \return 0 if OK, 1 on error | |
| 272 */ | |
| 273 l_ok | |
| 274 bmfGetBaseline(L_BMF *bmf, | |
| 275 char chr, | |
| 276 l_int32 *pbaseline) | |
| 277 { | |
| 278 l_int32 bl, index; | |
| 279 | |
| 280 if (!pbaseline) | |
| 281 return ERROR_INT("&baseline not defined", __func__, 1); | |
| 282 *pbaseline = 0; | |
| 283 if (!bmf) | |
| 284 return ERROR_INT("bmf not defined", __func__, 1); | |
| 285 if ((index = (l_int32)chr) == 10) /* NL */ | |
| 286 return 0; | |
| 287 if (index < 32 || index >= 127) | |
| 288 return ERROR_INT("invalid index", __func__, 1); | |
| 289 | |
| 290 bl = bmf->baselinetab[index]; | |
| 291 if (bl == UNDEF) { | |
| 292 L_ERROR("no bitmap representation for %d\n", __func__, index); | |
| 293 return 1; | |
| 294 } | |
| 295 | |
| 296 *pbaseline = bl; | |
| 297 return 0; | |
| 298 } | |
| 299 | |
| 300 | |
| 301 /*---------------------------------------------------------------------*/ | |
| 302 /* Font bitmap acquisition and generation */ | |
| 303 /*---------------------------------------------------------------------*/ | |
| 304 /*! | |
| 305 * \brief pixaGetFont() | |
| 306 * | |
| 307 * \param[in] dir directory holding pixa of character set | |
| 308 * \param[in] fontsize 4, 6, 8, ... , 20 | |
| 309 * \param[out] pbl0 baseline of row 1 | |
| 310 * \param[out] pbl1 baseline of row 2 | |
| 311 * \param[out] pbl2 baseline of row 3 | |
| 312 * \return pixa of font bitmaps for 95 characters, or NULL on error | |
| 313 * | |
| 314 * <pre> | |
| 315 * Notes: | |
| 316 * (1) This reads a pre-computed pixa file with the 95 ascii chars. | |
| 317 * </pre> | |
| 318 */ | |
| 319 PIXA * | |
| 320 pixaGetFont(const char *dir, | |
| 321 l_int32 fontsize, | |
| 322 l_int32 *pbl0, | |
| 323 l_int32 *pbl1, | |
| 324 l_int32 *pbl2) | |
| 325 { | |
| 326 char *pathname; | |
| 327 l_int32 fileno; | |
| 328 PIXA *pixa; | |
| 329 | |
| 330 fileno = (fontsize / 2) - 2; | |
| 331 if (fileno < 0 || fileno >= NUM_FONTS) | |
| 332 return (PIXA *)ERROR_PTR("font size invalid", __func__, NULL); | |
| 333 if (!pbl0 || !pbl1 || !pbl2) | |
| 334 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL); | |
| 335 *pbl0 = baselines[fileno][0]; | |
| 336 *pbl1 = baselines[fileno][1]; | |
| 337 *pbl2 = baselines[fileno][2]; | |
| 338 | |
| 339 pathname = pathJoin(dir, outputfonts[fileno]); | |
| 340 pixa = pixaRead(pathname); | |
| 341 LEPT_FREE(pathname); | |
| 342 | |
| 343 if (!pixa) | |
| 344 L_WARNING("pixa of char bitmaps not found\n", __func__); | |
| 345 return pixa; | |
| 346 } | |
| 347 | |
| 348 | |
| 349 /*! | |
| 350 * \brief pixaSaveFont() | |
| 351 * | |
| 352 * \param[in] indir [optional] directory holding image of character set | |
| 353 * \param[in] outdir directory into which the output pixa file | |
| 354 * will be written | |
| 355 * \param[in] fontsize in pts, at 300 ppi | |
| 356 * \return 0 if OK, 1 on error | |
| 357 * | |
| 358 * <pre> | |
| 359 * Notes: | |
| 360 * (1) This saves a font of a particular size. | |
| 361 * (2) If %indir == null, this generates the font bitmaps from a | |
| 362 * compiled string. | |
| 363 * (3) prog/genfonts calls this function for each of the | |
| 364 * nine font sizes, to generate all the font pixa files. | |
| 365 * </pre> | |
| 366 */ | |
| 367 l_ok | |
| 368 pixaSaveFont(const char *indir, | |
| 369 const char *outdir, | |
| 370 l_int32 fontsize) | |
| 371 { | |
| 372 char *pathname; | |
| 373 l_int32 bl1, bl2, bl3; | |
| 374 PIXA *pixa; | |
| 375 | |
| 376 if (fontsize < 4 || fontsize > 20 || (fontsize % 2)) | |
| 377 return ERROR_INT("fontsize must be in {4, 6, ..., 20}", __func__, 1); | |
| 378 | |
| 379 if (!indir) /* Generate from a string */ | |
| 380 pixa = pixaGenerateFontFromString(fontsize, &bl1, &bl2, &bl3); | |
| 381 else /* Generate from an image file */ | |
| 382 pixa = pixaGenerateFontFromFile(indir, fontsize, &bl1, &bl2, &bl3); | |
| 383 if (!pixa) | |
| 384 return ERROR_INT("pixa not made", __func__, 1); | |
| 385 | |
| 386 pathname = pathJoin(outdir, outputfonts[(fontsize - 4) / 2]); | |
| 387 pixaWrite(pathname, pixa); | |
| 388 | |
| 389 #if DEBUG_FONT_GEN | |
| 390 L_INFO("Found %d chars in font size %d\n", __func__, pixaGetCount(pixa), | |
| 391 fontsize); | |
| 392 L_INFO("Baselines are at: %d, %d, %d\n", __func__, bl1, bl2, bl3); | |
| 393 #endif /* DEBUG_FONT_GEN */ | |
| 394 | |
| 395 LEPT_FREE(pathname); | |
| 396 pixaDestroy(&pixa); | |
| 397 return 0; | |
| 398 } | |
| 399 | |
| 400 | |
| 401 /*! | |
| 402 * \brief pixaGenerateFontFromFile() | |
| 403 * | |
| 404 * \param[in] dir directory holding image of character set | |
| 405 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi | |
| 406 * \param[out] pbl0 baseline of row 1 | |
| 407 * \param[out] pbl1 baseline of row 2 | |
| 408 * \param[out] pbl2 baseline of row 3 | |
| 409 * \return pixa of font bitmaps for 95 characters, or NULL on error | |
| 410 * | |
| 411 * These font generation functions use 9 sets, each with bitmaps | |
| 412 * of 94 ascii characters, all in Palatino-Roman font. | |
| 413 * Each input bitmap has 3 rows of characters. The range of | |
| 414 * ascii values in each row is as follows: | |
| 415 * row 0: 32-57 32 is a space | |
| 416 * row 1: 58-91 92, '\', is not represented in this font | |
| 417 * row 2: 93-126 | |
| 418 * We LR flip the '/' char to generate a bitmap for the missing | |
| 419 * '\' character, so that we have representations of all 95 | |
| 420 * printable chars. | |
| 421 * | |
| 422 * Typically, use pixaGetFont() to generate the character bitmaps | |
| 423 * in memory for a bmf. This will simply access the bitmap files | |
| 424 * in a serialized pixa that were produced in prog/genfonts.c using | |
| 425 * this function. | |
| 426 */ | |
| 427 static PIXA * | |
| 428 pixaGenerateFontFromFile(const char *dir, | |
| 429 l_int32 fontsize, | |
| 430 l_int32 *pbl0, | |
| 431 l_int32 *pbl1, | |
| 432 l_int32 *pbl2) | |
| 433 { | |
| 434 char *pathname; | |
| 435 l_int32 fileno; | |
| 436 PIX *pix; | |
| 437 PIXA *pixa; | |
| 438 | |
| 439 if (!pbl0 || !pbl1 || !pbl2) | |
| 440 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL); | |
| 441 *pbl0 = *pbl1 = *pbl2 = 0; | |
| 442 if (!dir) | |
| 443 return (PIXA *)ERROR_PTR("dir not defined", __func__, NULL); | |
| 444 fileno = (fontsize / 2) - 2; | |
| 445 if (fileno < 0 || fileno >= NUM_FONTS) | |
| 446 return (PIXA *)ERROR_PTR("font size invalid", __func__, NULL); | |
| 447 | |
| 448 pathname = pathJoin(dir, inputfonts[fileno]); | |
| 449 pix = pixRead(pathname); | |
| 450 LEPT_FREE(pathname); | |
| 451 if (!pix) { | |
| 452 L_ERROR("pix not found for font size %d\n", __func__, fontsize); | |
| 453 return NULL; | |
| 454 } | |
| 455 | |
| 456 pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2); | |
| 457 pixDestroy(&pix); | |
| 458 return pixa; | |
| 459 } | |
| 460 | |
| 461 | |
| 462 /*! | |
| 463 * \brief pixaGenerateFontFromString() | |
| 464 * | |
| 465 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi | |
| 466 * \param[out] pbl0 baseline of row 1 | |
| 467 * \param[out] pbl1 baseline of row 2 | |
| 468 * \param[out] pbl2 baseline of row 3 | |
| 469 * \return pixa of font bitmaps for 95 characters, or NULL on error | |
| 470 * | |
| 471 * <pre> | |
| 472 * Notes: | |
| 473 * (1) See pixaGenerateFontFromFile() for details. | |
| 474 * </pre> | |
| 475 */ | |
| 476 static PIXA * | |
| 477 pixaGenerateFontFromString(l_int32 fontsize, | |
| 478 l_int32 *pbl0, | |
| 479 l_int32 *pbl1, | |
| 480 l_int32 *pbl2) | |
| 481 { | |
| 482 l_uint8 *data; | |
| 483 l_int32 redsize, nbytes; | |
| 484 PIX *pix; | |
| 485 PIXA *pixa; | |
| 486 | |
| 487 if (!pbl0 || !pbl1 || !pbl2) | |
| 488 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL); | |
| 489 *pbl0 = *pbl1 = *pbl2 = 0; | |
| 490 redsize = (fontsize / 2) - 2; | |
| 491 if (redsize < 0 || redsize >= NUM_FONTS) | |
| 492 return (PIXA *)ERROR_PTR("invalid font size", __func__, NULL); | |
| 493 | |
| 494 if (fontsize == 4) { | |
| 495 data = decodeBase64(fontdata_4, strlen(fontdata_4), &nbytes); | |
| 496 } else if (fontsize == 6) { | |
| 497 data = decodeBase64(fontdata_6, strlen(fontdata_6), &nbytes); | |
| 498 } else if (fontsize == 8) { | |
| 499 data = decodeBase64(fontdata_8, strlen(fontdata_8), &nbytes); | |
| 500 } else if (fontsize == 10) { | |
| 501 data = decodeBase64(fontdata_10, strlen(fontdata_10), &nbytes); | |
| 502 } else if (fontsize == 12) { | |
| 503 data = decodeBase64(fontdata_12, strlen(fontdata_12), &nbytes); | |
| 504 } else if (fontsize == 14) { | |
| 505 data = decodeBase64(fontdata_14, strlen(fontdata_14), &nbytes); | |
| 506 } else if (fontsize == 16) { | |
| 507 data = decodeBase64(fontdata_16, strlen(fontdata_16), &nbytes); | |
| 508 } else if (fontsize == 18) { | |
| 509 data = decodeBase64(fontdata_18, strlen(fontdata_18), &nbytes); | |
| 510 } else { /* fontsize == 20 */ | |
| 511 data = decodeBase64(fontdata_20, strlen(fontdata_20), &nbytes); | |
| 512 } | |
| 513 if (!data) | |
| 514 return (PIXA *)ERROR_PTR("data not made", __func__, NULL); | |
| 515 | |
| 516 pix = pixReadMem(data, nbytes); | |
| 517 LEPT_FREE(data); | |
| 518 if (!pix) | |
| 519 return (PIXA *)ERROR_PTR("pix not made", __func__, NULL); | |
| 520 | |
| 521 pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2); | |
| 522 pixDestroy(&pix); | |
| 523 return pixa; | |
| 524 } | |
| 525 | |
| 526 | |
| 527 /*! | |
| 528 * \brief pixaGenerateFont() | |
| 529 * | |
| 530 * \param[in] pixs of 95 characters in 3 rows | |
| 531 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi | |
| 532 * \param[out] pbl0 baseline of row 1 | |
| 533 * \param[out] pbl1 baseline of row 2 | |
| 534 * \param[out] pbl2 baseline of row 3 | |
| 535 * \return pixa of font bitmaps for 95 characters, or NULL on error | |
| 536 * | |
| 537 * <pre> | |
| 538 * Notes: | |
| 539 * (1) This does all the work. See pixaGenerateFontFromFile() | |
| 540 * for an overview. | |
| 541 * (2) The pix is for one of the 9 fonts. %fontsize is only | |
| 542 * used here for debugging. | |
| 543 * </pre> | |
| 544 */ | |
| 545 static PIXA * | |
| 546 pixaGenerateFont(PIX *pixs, | |
| 547 l_int32 fontsize, | |
| 548 l_int32 *pbl0, | |
| 549 l_int32 *pbl1, | |
| 550 l_int32 *pbl2) | |
| 551 { | |
| 552 l_int32 i, j, nrows, nrowchars, nchars, h, yval; | |
| 553 l_int32 width, height; | |
| 554 l_int32 baseline[3]; | |
| 555 l_int32 *tab = NULL; | |
| 556 BOX *box, *box1, *box2; | |
| 557 BOXA *boxar, *boxac, *boxacs; | |
| 558 PIX *pix1, *pix2, *pixr, *pixrc, *pixc; | |
| 559 PIXA *pixa; | |
| 560 l_int32 n, w, inrow, top; | |
| 561 l_int32 *ia; | |
| 562 NUMA *na; | |
| 563 | |
| 564 if (!pbl0 || !pbl1 || !pbl2) | |
| 565 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL); | |
| 566 *pbl0 = *pbl1 = *pbl2 = 0; | |
| 567 if (!pixs) | |
| 568 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 569 | |
| 570 /* Locate the 3 rows of characters */ | |
| 571 w = pixGetWidth(pixs); | |
| 572 na = pixCountPixelsByRow(pixs, NULL); | |
| 573 boxar = boxaCreate(0); | |
| 574 n = numaGetCount(na); | |
| 575 ia = numaGetIArray(na); | |
| 576 inrow = 0; | |
| 577 for (i = 0; i < n; i++) { | |
| 578 if (!inrow && ia[i] > 0) { | |
| 579 inrow = 1; | |
| 580 top = i; | |
| 581 } else if (inrow && ia[i] == 0) { | |
| 582 inrow = 0; | |
| 583 box = boxCreate(0, top, w, i - top); | |
| 584 boxaAddBox(boxar, box, L_INSERT); | |
| 585 } | |
| 586 } | |
| 587 LEPT_FREE(ia); | |
| 588 numaDestroy(&na); | |
| 589 nrows = boxaGetCount(boxar); | |
| 590 #if DEBUG_FONT_GEN | |
| 591 L_INFO("For fontsize %s, have %d rows\n", __func__, fontsize, nrows); | |
| 592 #endif /* DEBUG_FONT_GEN */ | |
| 593 if (nrows != 3) { | |
| 594 L_INFO("nrows = %d; skipping fontsize %d\n", __func__, nrows, fontsize); | |
| 595 boxaDestroy(&boxar); | |
| 596 return (PIXA *)ERROR_PTR("3 rows not generated", __func__, NULL); | |
| 597 } | |
| 598 | |
| 599 /* Grab the character images and baseline data */ | |
| 600 #if DEBUG_BASELINE | |
| 601 lept_rmdir("baseline"); | |
| 602 lept_mkdir("baseline"); | |
| 603 #endif /* DEBUG_BASELINE */ | |
| 604 tab = makePixelSumTab8(); | |
| 605 pixa = pixaCreate(95); | |
| 606 for (i = 0; i < nrows; i++) { | |
| 607 box = boxaGetBox(boxar, i, L_CLONE); | |
| 608 pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ | |
| 609 pixGetTextBaseline(pixr, tab, &yval); | |
| 610 baseline[i] = yval; | |
| 611 | |
| 612 #if DEBUG_BASELINE | |
| 613 L_INFO("Baseline info: row %d, yval = %d, h = %d\n", __func__, | |
| 614 i, yval, pixGetHeight(pixr)); | |
| 615 pix1 = pixCopy(NULL, pixr); | |
| 616 pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1, | |
| 617 L_FLIP_PIXELS); | |
| 618 if (i == 0 ) | |
| 619 pixWriteDebug("/tmp/baseline/row0.png", pix1, IFF_PNG); | |
| 620 else if (i == 1) | |
| 621 pixWriteDebug("/tmp/baseline/row1.png", pix1, IFF_PNG); | |
| 622 else | |
| 623 pixWriteDebug("/tmp/baseline/row2.png", pix1, IFF_PNG); | |
| 624 pixDestroy(&pix1); | |
| 625 #endif /* DEBUG_BASELINE */ | |
| 626 | |
| 627 boxDestroy(&box); | |
| 628 pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); | |
| 629 boxac = pixConnComp(pixrc, NULL, 8); | |
| 630 boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); | |
| 631 if (i == 0) { /* consolidate the two components of '"' */ | |
| 632 box1 = boxaGetBox(boxacs, 1, L_CLONE); | |
| 633 box2 = boxaGetBox(boxacs, 2, L_CLONE); | |
| 634 box1->w = box2->x + box2->w - box1->x; /* increase width */ | |
| 635 boxDestroy(&box1); | |
| 636 boxDestroy(&box2); | |
| 637 boxaRemoveBox(boxacs, 2); | |
| 638 } | |
| 639 h = pixGetHeight(pixr); | |
| 640 nrowchars = boxaGetCount(boxacs); | |
| 641 for (j = 0; j < nrowchars; j++) { | |
| 642 box = boxaGetBox(boxacs, j, L_COPY); | |
| 643 if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ | |
| 644 boxDestroy(&box); | |
| 645 continue; | |
| 646 } | |
| 647 box->y = 0; | |
| 648 box->h = h - 1; | |
| 649 pixc = pixClipRectangle(pixr, box, NULL); | |
| 650 boxDestroy(&box); | |
| 651 if (i == 0 && j == 0) /* add a pix for the space; change later */ | |
| 652 pixaAddPix(pixa, pixc, L_COPY); | |
| 653 if (i == 2 && j == 0) /* add a pix for the '\'; change later */ | |
| 654 pixaAddPix(pixa, pixc, L_COPY); | |
| 655 pixaAddPix(pixa, pixc, L_INSERT); | |
| 656 } | |
| 657 pixDestroy(&pixr); | |
| 658 pixDestroy(&pixrc); | |
| 659 boxaDestroy(&boxac); | |
| 660 boxaDestroy(&boxacs); | |
| 661 } | |
| 662 LEPT_FREE(tab); | |
| 663 | |
| 664 nchars = pixaGetCount(pixa); | |
| 665 if (nchars != 95) | |
| 666 return (PIXA *)ERROR_PTR("95 chars not generated", __func__, NULL); | |
| 667 | |
| 668 *pbl0 = baseline[0]; | |
| 669 *pbl1 = baseline[1]; | |
| 670 *pbl2 = baseline[2]; | |
| 671 | |
| 672 /* Fix the space character up; it should have no ON pixels, | |
| 673 * and be about twice as wide as the '!' character. */ | |
| 674 pix1 = pixaGetPix(pixa, 0, L_CLONE); | |
| 675 width = 2 * pixGetWidth(pix1); | |
| 676 height = pixGetHeight(pix1); | |
| 677 pixDestroy(&pix1); | |
| 678 pix1 = pixCreate(width, height, 1); | |
| 679 pixaReplacePix(pixa, 0, pix1, NULL); | |
| 680 | |
| 681 /* Fix up the '\' character; use a LR flip of the '/' char */ | |
| 682 pix1 = pixaGetPix(pixa, 15, L_CLONE); | |
| 683 pix2 = pixFlipLR(NULL, pix1); | |
| 684 pixDestroy(&pix1); | |
| 685 pixaReplacePix(pixa, 60, pix2, NULL); | |
| 686 | |
| 687 #if DEBUG_CHARS | |
| 688 pix1 = pixaDisplayTiled(pixa, 1500, 0, 10); | |
| 689 pixDisplay(pix1, 100 * i, 200); | |
| 690 pixDestroy(&pix1); | |
| 691 #endif /* DEBUG_CHARS */ | |
| 692 | |
| 693 boxaDestroy(&boxar); | |
| 694 return pixa; | |
| 695 } | |
| 696 | |
| 697 | |
| 698 /*! | |
| 699 * \brief pixGetTextBaseline() | |
| 700 * | |
| 701 * \param[in] pixs 1 bpp, one textline character set | |
| 702 * \param[in] tab8 [optional] pixel sum table | |
| 703 * \param[out] py baseline value | |
| 704 * \return 0 if OK, 1 on error | |
| 705 * | |
| 706 * <pre> | |
| 707 * Notes: | |
| 708 * (1) Method: find the largest difference in pixel sums from one | |
| 709 * raster line to the next one below it. The baseline is the | |
| 710 * upper raster line for the pair of raster lines that | |
| 711 * maximizes this function. | |
| 712 * </pre> | |
| 713 */ | |
| 714 static l_int32 | |
| 715 pixGetTextBaseline(PIX *pixs, | |
| 716 l_int32 *tab8, | |
| 717 l_int32 *py) | |
| 718 { | |
| 719 l_int32 i, h, val1, val2, diff, diffmax, ymax; | |
| 720 l_int32 *tab; | |
| 721 NUMA *na; | |
| 722 | |
| 723 if (!pixs) | |
| 724 return ERROR_INT("pixs not defined", __func__, 1); | |
| 725 if (!py) | |
| 726 return ERROR_INT("&y not defined", __func__, 1); | |
| 727 *py = 0; | |
| 728 if (!tab8) | |
| 729 tab = makePixelSumTab8(); | |
| 730 else | |
| 731 tab = tab8; | |
| 732 | |
| 733 na = pixCountPixelsByRow(pixs, tab); | |
| 734 h = numaGetCount(na); | |
| 735 diffmax = 0; | |
| 736 ymax = 0; | |
| 737 for (i = 1; i < h; i++) { | |
| 738 numaGetIValue(na, i - 1, &val1); | |
| 739 numaGetIValue(na, i, &val2); | |
| 740 diff = L_MAX(0, val1 - val2); | |
| 741 if (diff > diffmax) { | |
| 742 diffmax = diff; | |
| 743 ymax = i - 1; /* upper raster line */ | |
| 744 } | |
| 745 } | |
| 746 *py = ymax; | |
| 747 | |
| 748 if (!tab8) | |
| 749 LEPT_FREE(tab); | |
| 750 numaDestroy(&na); | |
| 751 return 0; | |
| 752 } | |
| 753 | |
| 754 | |
| 755 /*! | |
| 756 * \brief bmfMakeAsciiTables | |
| 757 * | |
| 758 * \param[in] bmf | |
| 759 * \return 0 if OK, 1 on error | |
| 760 * | |
| 761 * <pre> | |
| 762 * Notes: | |
| 763 * (1) This makes two tables, each of size 128, as follows: | |
| 764 * ~ fonttab is a table containing the index of the Pix | |
| 765 * that corresponds to each input ascii character; | |
| 766 * it maps (ascii-index) --> Pixa index | |
| 767 * ~ baselinetab is a table containing the baseline offset | |
| 768 * for the Pix that corresponds to each input ascii character; | |
| 769 * it maps (ascii-index) --> baseline offset | |
| 770 * (2) This also computes | |
| 771 * ~ lineheight (sum of maximum character extensions above and | |
| 772 * below the baseline) | |
| 773 * ~ kernwidth (spacing between characters within a word) | |
| 774 * ~ spacewidth (space between words) | |
| 775 * ~ vertlinesep (extra vertical spacing between textlines) | |
| 776 * (3) The baselines apply as follows: | |
| 777 * baseline1 (ascii 32 - 57), ascii 92 | |
| 778 * baseline2 (ascii 58 - 91) | |
| 779 * baseline3 (ascii 93 - 126) | |
| 780 * (4) The only array in bmf that is not ascii-based is the | |
| 781 * array of bitmaps in the pixa, which starts at ascii 32. | |
| 782 * </pre> | |
| 783 */ | |
| 784 static l_int32 | |
| 785 bmfMakeAsciiTables(L_BMF *bmf) | |
| 786 { | |
| 787 l_int32 i, maxh, height, charwidth, xwidth, kernwidth; | |
| 788 l_int32 *fonttab, *baselinetab; | |
| 789 PIX *pix; | |
| 790 | |
| 791 if (!bmf) | |
| 792 return ERROR_INT("bmf not defined", __func__, 1); | |
| 793 | |
| 794 /* First get the fonttab; we use this later for the char widths */ | |
| 795 fonttab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); | |
| 796 bmf->fonttab = fonttab; | |
| 797 for (i = 0; i < 128; i++) | |
| 798 fonttab[i] = UNDEF; | |
| 799 for (i = 32; i < 127; i++) | |
| 800 fonttab[i] = i - 32; | |
| 801 | |
| 802 baselinetab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); | |
| 803 bmf->baselinetab = baselinetab; | |
| 804 for (i = 0; i < 128; i++) | |
| 805 baselinetab[i] = UNDEF; | |
| 806 for (i = 32; i <= 57; i++) | |
| 807 baselinetab[i] = bmf->baseline1; | |
| 808 for (i = 58; i <= 91; i++) | |
| 809 baselinetab[i] = bmf->baseline2; | |
| 810 baselinetab[92] = bmf->baseline1; /* the '\' char */ | |
| 811 for (i = 93; i < 127; i++) | |
| 812 baselinetab[i] = bmf->baseline3; | |
| 813 | |
| 814 /* Get the line height of text characters, from the highest | |
| 815 * ascender to the lowest descender; req's fonttab to exist. */ | |
| 816 pix = bmfGetPix(bmf, 32); | |
| 817 maxh = pixGetHeight(pix); | |
| 818 pixDestroy(&pix); | |
| 819 pix = bmfGetPix(bmf, 58); | |
| 820 height = pixGetHeight(pix); | |
| 821 pixDestroy(&pix); | |
| 822 maxh = L_MAX(maxh, height); | |
| 823 pix = bmfGetPix(bmf, 93); | |
| 824 height = pixGetHeight(pix); | |
| 825 pixDestroy(&pix); | |
| 826 maxh = L_MAX(maxh, height); | |
| 827 bmf->lineheight = maxh; | |
| 828 | |
| 829 /* Get the kern width (distance between characters). | |
| 830 * We let it be the same for all characters in a given | |
| 831 * font size, and scale it linearly with the size; | |
| 832 * req's fonttab to be built first. */ | |
| 833 bmfGetWidth(bmf, 120, &xwidth); | |
| 834 kernwidth = (l_int32)(0.08 * (l_float32)xwidth + 0.5); | |
| 835 bmf->kernwidth = L_MAX(1, kernwidth); | |
| 836 | |
| 837 /* Save the space width (between words) */ | |
| 838 bmfGetWidth(bmf, 32, &charwidth); | |
| 839 bmf->spacewidth = charwidth; | |
| 840 | |
| 841 /* Save the extra vertical space between lines */ | |
| 842 bmf->vertlinesep = (l_int32)(VertFractSep * bmf->lineheight + 0.5); | |
| 843 | |
| 844 return 0; | |
| 845 } |
