comparison mupdf-source/thirdparty/leptonica/src/bmf.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file bmf.c
29 * <pre>
30 *
31 * Acquisition and generation of bitmap fonts.
32 *
33 * L_BMF *bmfCreate()
34 * L_BMF *bmfDestroy()
35 *
36 * PIX *bmfGetPix()
37 * l_int32 bmfGetWidth()
38 * l_int32 bmfGetBaseline()
39 *
40 * PIXA *pixaGetFont()
41 * l_int32 pixaSaveFont()
42 * static PIXA *pixaGenerateFontFromFile()
43 * static PIXA *pixaGenerateFontFromString()
44 * static PIXA *pixaGenerateFont()
45 * static l_int32 pixGetTextBaseline()
46 * static l_int32 bmfMakeAsciiTables()
47 *
48 * This is not a very general utility, because it only uses bitmap
49 * representations of a single font, Palatino-Roman, with the
50 * normal style. It uses bitmaps generated for nine sizes, from
51 * 4 to 20 pts, rendered at 300 ppi. Generalization to different
52 * fonts, styles and sizes is straightforward.
53 *
54 * I chose Palatino-Roman is because I like it.
55 * The input font images were generated from a set of small
56 * PostScript files, such as chars-12.ps, which were rendered
57 * into the inputfont[] bitmap files using GhostScript. See, for
58 * example, the bash script prog/ps2tiff, which will "rip" a
59 * PostScript file into a set of ccitt-g4 compressed tiff files.
60 *
61 * The set of ascii characters from 32 through 126 are the 95
62 * printable ascii chars. Palatino-Roman is missing char 92, '\'.
63 * I have substituted an LR flip of '/', char 47, for 92, so that
64 * there are no missing printable chars in this set. The space is
65 * char 32, and I have given it a width equal to twice the width of '!'.
66 * </pre>
67 */
68
69 #ifdef HAVE_CONFIG_H
70 #include <config_auto.h>
71 #endif /* HAVE_CONFIG_H */
72
73 #include <string.h>
74 #include "allheaders.h"
75 #include "pix_internal.h"
76 #include "bmfdata.h"
77
78 static const l_float32 VertFractSep = 0.3f;
79
80 #ifndef NO_CONSOLE_IO
81 #define DEBUG_BASELINE 0
82 #define DEBUG_CHARS 0
83 #define DEBUG_FONT_GEN 0
84 #endif /* ~NO_CONSOLE_IO */
85
86 static PIXA *pixaGenerateFontFromFile(const char *dir, l_int32 fontsize,
87 l_int32 *pbl0, l_int32 *pbl1,
88 l_int32 *pbl2);
89 static PIXA *pixaGenerateFontFromString(l_int32 fontsize, l_int32 *pbl0,
90 l_int32 *pbl1, l_int32 *pbl2);
91 static PIXA *pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0,
92 l_int32 *pbl1, l_int32 *pbl2);
93 static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py);
94 static l_int32 bmfMakeAsciiTables(L_BMF *bmf);
95
96 /*---------------------------------------------------------------------*/
97 /* Bmf create/destroy */
98 /*---------------------------------------------------------------------*/
99 /*!
100 * \brief bmfCreate()
101 *
102 * \param[in] dir [optional] directory holding pixa of character set
103 * \param[in] fontsize 4, 6, 8, ... , 20
104 * \return bmf holding the bitmap font and associated information
105 *
106 * <pre>
107 * Notes:
108 * (1) If %dir == null, this generates the font bitmaps from a
109 * compiled string.
110 * (2) Otherwise, this tries to read a pre-computed pixa file with the
111 * 95 ascii chars in it. If the file is not found, it then
112 * attempts to generate the pixa and associated baseline
113 * data from a tiff image containing all the characters. If
114 * that fails, it uses the compiled string.
115 * </pre>
116 */
117 L_BMF *
118 bmfCreate(const char *dir,
119 l_int32 fontsize)
120 {
121 L_BMF *bmf;
122 PIXA *pixa;
123
124 if (fontsize < 4 || fontsize > 20 || (fontsize % 2))
125 return (L_BMF *)ERROR_PTR("fontsize must be in {4, 6, ..., 20}",
126 __func__, NULL);
127
128 bmf = (L_BMF *)LEPT_CALLOC(1, sizeof(L_BMF));
129
130 if (!dir) { /* Generate from a string */
131 pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1,
132 &bmf->baseline2, &bmf->baseline3);
133 } else { /* Look for the pixa in a directory */
134 pixa = pixaGetFont(dir, fontsize, &bmf->baseline1, &bmf->baseline2,
135 &bmf->baseline3);
136 if (!pixa) { /* Not found; make it from a file */
137 L_INFO("Generating pixa of bitmap fonts from file\n", __func__);
138 pixa = pixaGenerateFontFromFile(dir, fontsize, &bmf->baseline1,
139 &bmf->baseline2, &bmf->baseline3);
140 if (!pixa) { /* Not made; make it from a string after all */
141 L_ERROR("Failed to make font; use string\n", __func__);
142 pixa = pixaGenerateFontFromString(fontsize, &bmf->baseline1,
143 &bmf->baseline2, &bmf->baseline3);
144 }
145 }
146 }
147
148 if (!pixa) {
149 bmfDestroy(&bmf);
150 return (L_BMF *)ERROR_PTR("font pixa not made", __func__, NULL);
151 }
152
153 bmf->pixa = pixa;
154 bmf->size = fontsize;
155 if (dir) bmf->directory = stringNew(dir);
156 bmfMakeAsciiTables(bmf);
157 return bmf;
158 }
159
160
161 /*!
162 * \brief bmfDestroy()
163 *
164 * \param[in,out] pbmf will be set to null before returning
165 * \return void
166 */
167 void
168 bmfDestroy(L_BMF **pbmf)
169 {
170 L_BMF *bmf;
171
172 if (pbmf == NULL) {
173 L_WARNING("ptr address is null!\n", __func__);
174 return;
175 }
176
177 if ((bmf = *pbmf) == NULL)
178 return;
179
180 pixaDestroy(&bmf->pixa);
181 LEPT_FREE(bmf->directory);
182 LEPT_FREE(bmf->fonttab);
183 LEPT_FREE(bmf->baselinetab);
184 LEPT_FREE(bmf);
185 *pbmf = NULL;
186 }
187
188
189 /*---------------------------------------------------------------------*/
190 /* Bmf accessors */
191 /*---------------------------------------------------------------------*/
192 /*!
193 * \brief bmfGetPix()
194 *
195 * \param[in] bmf
196 * \param[in] chr should be one of the 95 supported printable bitmaps
197 * \return pix clone of pix in bmf, or NULL on error
198 */
199 PIX *
200 bmfGetPix(L_BMF *bmf,
201 char chr)
202 {
203 l_int32 i, index;
204 PIXA *pixa;
205
206 if ((index = (l_int32)chr) == 10) /* NL */
207 return NULL;
208 if (!bmf)
209 return (PIX *)ERROR_PTR("bmf not defined", __func__, NULL);
210 if (index < 32 || index >= 127)
211 return (PIX *)ERROR_PTR("invalid index", __func__, NULL);
212
213 i = bmf->fonttab[index];
214 if (i == UNDEF) {
215 L_ERROR("no bitmap representation for %d\n", __func__, index);
216 return NULL;
217 }
218
219 if ((pixa = bmf->pixa) == NULL)
220 return (PIX *)ERROR_PTR("pixa not found", __func__, NULL);
221
222 return pixaGetPix(pixa, i, L_CLONE);
223 }
224
225
226 /*!
227 * \brief bmfGetWidth()
228 *
229 * \param[in] bmf
230 * \param[in] chr should be one of the 95 supported bitmaps
231 * \param[out] pw character width; -1 if not printable
232 * \return 0 if OK, 1 on error
233 */
234 l_ok
235 bmfGetWidth(L_BMF *bmf,
236 char chr,
237 l_int32 *pw)
238 {
239 l_int32 i, index;
240 PIXA *pixa;
241
242 if (!pw)
243 return ERROR_INT("&w not defined", __func__, 1);
244 *pw = -1;
245 if (!bmf)
246 return ERROR_INT("bmf not defined", __func__, 1);
247 if ((index = (l_int32)chr) == 10) /* NL */
248 return 0;
249 if (index < 32 || index >= 127)
250 return ERROR_INT("invalid index", __func__, 1);
251
252 i = bmf->fonttab[index];
253 if (i == UNDEF) {
254 L_ERROR("no bitmap representation for %d\n", __func__, index);
255 return 1;
256 }
257
258 if ((pixa = bmf->pixa) == NULL)
259 return ERROR_INT("pixa not found", __func__, 1);
260
261 return pixaGetPixDimensions(pixa, i, pw, NULL, NULL);
262 }
263
264
265 /*!
266 * \brief bmfGetBaseline()
267 *
268 * \param[in] bmf
269 * \param[in] chr should be one of the 95 supported bitmaps
270 * \param[out] pbaseline distance below UL corner of bitmap char
271 * \return 0 if OK, 1 on error
272 */
273 l_ok
274 bmfGetBaseline(L_BMF *bmf,
275 char chr,
276 l_int32 *pbaseline)
277 {
278 l_int32 bl, index;
279
280 if (!pbaseline)
281 return ERROR_INT("&baseline not defined", __func__, 1);
282 *pbaseline = 0;
283 if (!bmf)
284 return ERROR_INT("bmf not defined", __func__, 1);
285 if ((index = (l_int32)chr) == 10) /* NL */
286 return 0;
287 if (index < 32 || index >= 127)
288 return ERROR_INT("invalid index", __func__, 1);
289
290 bl = bmf->baselinetab[index];
291 if (bl == UNDEF) {
292 L_ERROR("no bitmap representation for %d\n", __func__, index);
293 return 1;
294 }
295
296 *pbaseline = bl;
297 return 0;
298 }
299
300
301 /*---------------------------------------------------------------------*/
302 /* Font bitmap acquisition and generation */
303 /*---------------------------------------------------------------------*/
304 /*!
305 * \brief pixaGetFont()
306 *
307 * \param[in] dir directory holding pixa of character set
308 * \param[in] fontsize 4, 6, 8, ... , 20
309 * \param[out] pbl0 baseline of row 1
310 * \param[out] pbl1 baseline of row 2
311 * \param[out] pbl2 baseline of row 3
312 * \return pixa of font bitmaps for 95 characters, or NULL on error
313 *
314 * <pre>
315 * Notes:
316 * (1) This reads a pre-computed pixa file with the 95 ascii chars.
317 * </pre>
318 */
319 PIXA *
320 pixaGetFont(const char *dir,
321 l_int32 fontsize,
322 l_int32 *pbl0,
323 l_int32 *pbl1,
324 l_int32 *pbl2)
325 {
326 char *pathname;
327 l_int32 fileno;
328 PIXA *pixa;
329
330 fileno = (fontsize / 2) - 2;
331 if (fileno < 0 || fileno >= NUM_FONTS)
332 return (PIXA *)ERROR_PTR("font size invalid", __func__, NULL);
333 if (!pbl0 || !pbl1 || !pbl2)
334 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL);
335 *pbl0 = baselines[fileno][0];
336 *pbl1 = baselines[fileno][1];
337 *pbl2 = baselines[fileno][2];
338
339 pathname = pathJoin(dir, outputfonts[fileno]);
340 pixa = pixaRead(pathname);
341 LEPT_FREE(pathname);
342
343 if (!pixa)
344 L_WARNING("pixa of char bitmaps not found\n", __func__);
345 return pixa;
346 }
347
348
349 /*!
350 * \brief pixaSaveFont()
351 *
352 * \param[in] indir [optional] directory holding image of character set
353 * \param[in] outdir directory into which the output pixa file
354 * will be written
355 * \param[in] fontsize in pts, at 300 ppi
356 * \return 0 if OK, 1 on error
357 *
358 * <pre>
359 * Notes:
360 * (1) This saves a font of a particular size.
361 * (2) If %indir == null, this generates the font bitmaps from a
362 * compiled string.
363 * (3) prog/genfonts calls this function for each of the
364 * nine font sizes, to generate all the font pixa files.
365 * </pre>
366 */
367 l_ok
368 pixaSaveFont(const char *indir,
369 const char *outdir,
370 l_int32 fontsize)
371 {
372 char *pathname;
373 l_int32 bl1, bl2, bl3;
374 PIXA *pixa;
375
376 if (fontsize < 4 || fontsize > 20 || (fontsize % 2))
377 return ERROR_INT("fontsize must be in {4, 6, ..., 20}", __func__, 1);
378
379 if (!indir) /* Generate from a string */
380 pixa = pixaGenerateFontFromString(fontsize, &bl1, &bl2, &bl3);
381 else /* Generate from an image file */
382 pixa = pixaGenerateFontFromFile(indir, fontsize, &bl1, &bl2, &bl3);
383 if (!pixa)
384 return ERROR_INT("pixa not made", __func__, 1);
385
386 pathname = pathJoin(outdir, outputfonts[(fontsize - 4) / 2]);
387 pixaWrite(pathname, pixa);
388
389 #if DEBUG_FONT_GEN
390 L_INFO("Found %d chars in font size %d\n", __func__, pixaGetCount(pixa),
391 fontsize);
392 L_INFO("Baselines are at: %d, %d, %d\n", __func__, bl1, bl2, bl3);
393 #endif /* DEBUG_FONT_GEN */
394
395 LEPT_FREE(pathname);
396 pixaDestroy(&pixa);
397 return 0;
398 }
399
400
401 /*!
402 * \brief pixaGenerateFontFromFile()
403 *
404 * \param[in] dir directory holding image of character set
405 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
406 * \param[out] pbl0 baseline of row 1
407 * \param[out] pbl1 baseline of row 2
408 * \param[out] pbl2 baseline of row 3
409 * \return pixa of font bitmaps for 95 characters, or NULL on error
410 *
411 * These font generation functions use 9 sets, each with bitmaps
412 * of 94 ascii characters, all in Palatino-Roman font.
413 * Each input bitmap has 3 rows of characters. The range of
414 * ascii values in each row is as follows:
415 * row 0: 32-57 32 is a space
416 * row 1: 58-91 92, '\', is not represented in this font
417 * row 2: 93-126
418 * We LR flip the '/' char to generate a bitmap for the missing
419 * '\' character, so that we have representations of all 95
420 * printable chars.
421 *
422 * Typically, use pixaGetFont() to generate the character bitmaps
423 * in memory for a bmf. This will simply access the bitmap files
424 * in a serialized pixa that were produced in prog/genfonts.c using
425 * this function.
426 */
427 static PIXA *
428 pixaGenerateFontFromFile(const char *dir,
429 l_int32 fontsize,
430 l_int32 *pbl0,
431 l_int32 *pbl1,
432 l_int32 *pbl2)
433 {
434 char *pathname;
435 l_int32 fileno;
436 PIX *pix;
437 PIXA *pixa;
438
439 if (!pbl0 || !pbl1 || !pbl2)
440 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL);
441 *pbl0 = *pbl1 = *pbl2 = 0;
442 if (!dir)
443 return (PIXA *)ERROR_PTR("dir not defined", __func__, NULL);
444 fileno = (fontsize / 2) - 2;
445 if (fileno < 0 || fileno >= NUM_FONTS)
446 return (PIXA *)ERROR_PTR("font size invalid", __func__, NULL);
447
448 pathname = pathJoin(dir, inputfonts[fileno]);
449 pix = pixRead(pathname);
450 LEPT_FREE(pathname);
451 if (!pix) {
452 L_ERROR("pix not found for font size %d\n", __func__, fontsize);
453 return NULL;
454 }
455
456 pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2);
457 pixDestroy(&pix);
458 return pixa;
459 }
460
461
462 /*!
463 * \brief pixaGenerateFontFromString()
464 *
465 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
466 * \param[out] pbl0 baseline of row 1
467 * \param[out] pbl1 baseline of row 2
468 * \param[out] pbl2 baseline of row 3
469 * \return pixa of font bitmaps for 95 characters, or NULL on error
470 *
471 * <pre>
472 * Notes:
473 * (1) See pixaGenerateFontFromFile() for details.
474 * </pre>
475 */
476 static PIXA *
477 pixaGenerateFontFromString(l_int32 fontsize,
478 l_int32 *pbl0,
479 l_int32 *pbl1,
480 l_int32 *pbl2)
481 {
482 l_uint8 *data;
483 l_int32 redsize, nbytes;
484 PIX *pix;
485 PIXA *pixa;
486
487 if (!pbl0 || !pbl1 || !pbl2)
488 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL);
489 *pbl0 = *pbl1 = *pbl2 = 0;
490 redsize = (fontsize / 2) - 2;
491 if (redsize < 0 || redsize >= NUM_FONTS)
492 return (PIXA *)ERROR_PTR("invalid font size", __func__, NULL);
493
494 if (fontsize == 4) {
495 data = decodeBase64(fontdata_4, strlen(fontdata_4), &nbytes);
496 } else if (fontsize == 6) {
497 data = decodeBase64(fontdata_6, strlen(fontdata_6), &nbytes);
498 } else if (fontsize == 8) {
499 data = decodeBase64(fontdata_8, strlen(fontdata_8), &nbytes);
500 } else if (fontsize == 10) {
501 data = decodeBase64(fontdata_10, strlen(fontdata_10), &nbytes);
502 } else if (fontsize == 12) {
503 data = decodeBase64(fontdata_12, strlen(fontdata_12), &nbytes);
504 } else if (fontsize == 14) {
505 data = decodeBase64(fontdata_14, strlen(fontdata_14), &nbytes);
506 } else if (fontsize == 16) {
507 data = decodeBase64(fontdata_16, strlen(fontdata_16), &nbytes);
508 } else if (fontsize == 18) {
509 data = decodeBase64(fontdata_18, strlen(fontdata_18), &nbytes);
510 } else { /* fontsize == 20 */
511 data = decodeBase64(fontdata_20, strlen(fontdata_20), &nbytes);
512 }
513 if (!data)
514 return (PIXA *)ERROR_PTR("data not made", __func__, NULL);
515
516 pix = pixReadMem(data, nbytes);
517 LEPT_FREE(data);
518 if (!pix)
519 return (PIXA *)ERROR_PTR("pix not made", __func__, NULL);
520
521 pixa = pixaGenerateFont(pix, fontsize, pbl0, pbl1, pbl2);
522 pixDestroy(&pix);
523 return pixa;
524 }
525
526
527 /*!
528 * \brief pixaGenerateFont()
529 *
530 * \param[in] pixs of 95 characters in 3 rows
531 * \param[in] fontsize 4, 6, 8, ... , 20, in pts at 300 ppi
532 * \param[out] pbl0 baseline of row 1
533 * \param[out] pbl1 baseline of row 2
534 * \param[out] pbl2 baseline of row 3
535 * \return pixa of font bitmaps for 95 characters, or NULL on error
536 *
537 * <pre>
538 * Notes:
539 * (1) This does all the work. See pixaGenerateFontFromFile()
540 * for an overview.
541 * (2) The pix is for one of the 9 fonts. %fontsize is only
542 * used here for debugging.
543 * </pre>
544 */
545 static PIXA *
546 pixaGenerateFont(PIX *pixs,
547 l_int32 fontsize,
548 l_int32 *pbl0,
549 l_int32 *pbl1,
550 l_int32 *pbl2)
551 {
552 l_int32 i, j, nrows, nrowchars, nchars, h, yval;
553 l_int32 width, height;
554 l_int32 baseline[3];
555 l_int32 *tab = NULL;
556 BOX *box, *box1, *box2;
557 BOXA *boxar, *boxac, *boxacs;
558 PIX *pix1, *pix2, *pixr, *pixrc, *pixc;
559 PIXA *pixa;
560 l_int32 n, w, inrow, top;
561 l_int32 *ia;
562 NUMA *na;
563
564 if (!pbl0 || !pbl1 || !pbl2)
565 return (PIXA *)ERROR_PTR("&bl not all defined", __func__, NULL);
566 *pbl0 = *pbl1 = *pbl2 = 0;
567 if (!pixs)
568 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
569
570 /* Locate the 3 rows of characters */
571 w = pixGetWidth(pixs);
572 na = pixCountPixelsByRow(pixs, NULL);
573 boxar = boxaCreate(0);
574 n = numaGetCount(na);
575 ia = numaGetIArray(na);
576 inrow = 0;
577 for (i = 0; i < n; i++) {
578 if (!inrow && ia[i] > 0) {
579 inrow = 1;
580 top = i;
581 } else if (inrow && ia[i] == 0) {
582 inrow = 0;
583 box = boxCreate(0, top, w, i - top);
584 boxaAddBox(boxar, box, L_INSERT);
585 }
586 }
587 LEPT_FREE(ia);
588 numaDestroy(&na);
589 nrows = boxaGetCount(boxar);
590 #if DEBUG_FONT_GEN
591 L_INFO("For fontsize %s, have %d rows\n", __func__, fontsize, nrows);
592 #endif /* DEBUG_FONT_GEN */
593 if (nrows != 3) {
594 L_INFO("nrows = %d; skipping fontsize %d\n", __func__, nrows, fontsize);
595 boxaDestroy(&boxar);
596 return (PIXA *)ERROR_PTR("3 rows not generated", __func__, NULL);
597 }
598
599 /* Grab the character images and baseline data */
600 #if DEBUG_BASELINE
601 lept_rmdir("baseline");
602 lept_mkdir("baseline");
603 #endif /* DEBUG_BASELINE */
604 tab = makePixelSumTab8();
605 pixa = pixaCreate(95);
606 for (i = 0; i < nrows; i++) {
607 box = boxaGetBox(boxar, i, L_CLONE);
608 pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */
609 pixGetTextBaseline(pixr, tab, &yval);
610 baseline[i] = yval;
611
612 #if DEBUG_BASELINE
613 L_INFO("Baseline info: row %d, yval = %d, h = %d\n", __func__,
614 i, yval, pixGetHeight(pixr));
615 pix1 = pixCopy(NULL, pixr);
616 pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1,
617 L_FLIP_PIXELS);
618 if (i == 0 )
619 pixWriteDebug("/tmp/baseline/row0.png", pix1, IFF_PNG);
620 else if (i == 1)
621 pixWriteDebug("/tmp/baseline/row1.png", pix1, IFF_PNG);
622 else
623 pixWriteDebug("/tmp/baseline/row2.png", pix1, IFF_PNG);
624 pixDestroy(&pix1);
625 #endif /* DEBUG_BASELINE */
626
627 boxDestroy(&box);
628 pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35);
629 boxac = pixConnComp(pixrc, NULL, 8);
630 boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL);
631 if (i == 0) { /* consolidate the two components of '"' */
632 box1 = boxaGetBox(boxacs, 1, L_CLONE);
633 box2 = boxaGetBox(boxacs, 2, L_CLONE);
634 box1->w = box2->x + box2->w - box1->x; /* increase width */
635 boxDestroy(&box1);
636 boxDestroy(&box2);
637 boxaRemoveBox(boxacs, 2);
638 }
639 h = pixGetHeight(pixr);
640 nrowchars = boxaGetCount(boxacs);
641 for (j = 0; j < nrowchars; j++) {
642 box = boxaGetBox(boxacs, j, L_COPY);
643 if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */
644 boxDestroy(&box);
645 continue;
646 }
647 box->y = 0;
648 box->h = h - 1;
649 pixc = pixClipRectangle(pixr, box, NULL);
650 boxDestroy(&box);
651 if (i == 0 && j == 0) /* add a pix for the space; change later */
652 pixaAddPix(pixa, pixc, L_COPY);
653 if (i == 2 && j == 0) /* add a pix for the '\'; change later */
654 pixaAddPix(pixa, pixc, L_COPY);
655 pixaAddPix(pixa, pixc, L_INSERT);
656 }
657 pixDestroy(&pixr);
658 pixDestroy(&pixrc);
659 boxaDestroy(&boxac);
660 boxaDestroy(&boxacs);
661 }
662 LEPT_FREE(tab);
663
664 nchars = pixaGetCount(pixa);
665 if (nchars != 95)
666 return (PIXA *)ERROR_PTR("95 chars not generated", __func__, NULL);
667
668 *pbl0 = baseline[0];
669 *pbl1 = baseline[1];
670 *pbl2 = baseline[2];
671
672 /* Fix the space character up; it should have no ON pixels,
673 * and be about twice as wide as the '!' character. */
674 pix1 = pixaGetPix(pixa, 0, L_CLONE);
675 width = 2 * pixGetWidth(pix1);
676 height = pixGetHeight(pix1);
677 pixDestroy(&pix1);
678 pix1 = pixCreate(width, height, 1);
679 pixaReplacePix(pixa, 0, pix1, NULL);
680
681 /* Fix up the '\' character; use a LR flip of the '/' char */
682 pix1 = pixaGetPix(pixa, 15, L_CLONE);
683 pix2 = pixFlipLR(NULL, pix1);
684 pixDestroy(&pix1);
685 pixaReplacePix(pixa, 60, pix2, NULL);
686
687 #if DEBUG_CHARS
688 pix1 = pixaDisplayTiled(pixa, 1500, 0, 10);
689 pixDisplay(pix1, 100 * i, 200);
690 pixDestroy(&pix1);
691 #endif /* DEBUG_CHARS */
692
693 boxaDestroy(&boxar);
694 return pixa;
695 }
696
697
698 /*!
699 * \brief pixGetTextBaseline()
700 *
701 * \param[in] pixs 1 bpp, one textline character set
702 * \param[in] tab8 [optional] pixel sum table
703 * \param[out] py baseline value
704 * \return 0 if OK, 1 on error
705 *
706 * <pre>
707 * Notes:
708 * (1) Method: find the largest difference in pixel sums from one
709 * raster line to the next one below it. The baseline is the
710 * upper raster line for the pair of raster lines that
711 * maximizes this function.
712 * </pre>
713 */
714 static l_int32
715 pixGetTextBaseline(PIX *pixs,
716 l_int32 *tab8,
717 l_int32 *py)
718 {
719 l_int32 i, h, val1, val2, diff, diffmax, ymax;
720 l_int32 *tab;
721 NUMA *na;
722
723 if (!pixs)
724 return ERROR_INT("pixs not defined", __func__, 1);
725 if (!py)
726 return ERROR_INT("&y not defined", __func__, 1);
727 *py = 0;
728 if (!tab8)
729 tab = makePixelSumTab8();
730 else
731 tab = tab8;
732
733 na = pixCountPixelsByRow(pixs, tab);
734 h = numaGetCount(na);
735 diffmax = 0;
736 ymax = 0;
737 for (i = 1; i < h; i++) {
738 numaGetIValue(na, i - 1, &val1);
739 numaGetIValue(na, i, &val2);
740 diff = L_MAX(0, val1 - val2);
741 if (diff > diffmax) {
742 diffmax = diff;
743 ymax = i - 1; /* upper raster line */
744 }
745 }
746 *py = ymax;
747
748 if (!tab8)
749 LEPT_FREE(tab);
750 numaDestroy(&na);
751 return 0;
752 }
753
754
755 /*!
756 * \brief bmfMakeAsciiTables
757 *
758 * \param[in] bmf
759 * \return 0 if OK, 1 on error
760 *
761 * <pre>
762 * Notes:
763 * (1) This makes two tables, each of size 128, as follows:
764 * ~ fonttab is a table containing the index of the Pix
765 * that corresponds to each input ascii character;
766 * it maps (ascii-index) --> Pixa index
767 * ~ baselinetab is a table containing the baseline offset
768 * for the Pix that corresponds to each input ascii character;
769 * it maps (ascii-index) --> baseline offset
770 * (2) This also computes
771 * ~ lineheight (sum of maximum character extensions above and
772 * below the baseline)
773 * ~ kernwidth (spacing between characters within a word)
774 * ~ spacewidth (space between words)
775 * ~ vertlinesep (extra vertical spacing between textlines)
776 * (3) The baselines apply as follows:
777 * baseline1 (ascii 32 - 57), ascii 92
778 * baseline2 (ascii 58 - 91)
779 * baseline3 (ascii 93 - 126)
780 * (4) The only array in bmf that is not ascii-based is the
781 * array of bitmaps in the pixa, which starts at ascii 32.
782 * </pre>
783 */
784 static l_int32
785 bmfMakeAsciiTables(L_BMF *bmf)
786 {
787 l_int32 i, maxh, height, charwidth, xwidth, kernwidth;
788 l_int32 *fonttab, *baselinetab;
789 PIX *pix;
790
791 if (!bmf)
792 return ERROR_INT("bmf not defined", __func__, 1);
793
794 /* First get the fonttab; we use this later for the char widths */
795 fonttab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
796 bmf->fonttab = fonttab;
797 for (i = 0; i < 128; i++)
798 fonttab[i] = UNDEF;
799 for (i = 32; i < 127; i++)
800 fonttab[i] = i - 32;
801
802 baselinetab = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
803 bmf->baselinetab = baselinetab;
804 for (i = 0; i < 128; i++)
805 baselinetab[i] = UNDEF;
806 for (i = 32; i <= 57; i++)
807 baselinetab[i] = bmf->baseline1;
808 for (i = 58; i <= 91; i++)
809 baselinetab[i] = bmf->baseline2;
810 baselinetab[92] = bmf->baseline1; /* the '\' char */
811 for (i = 93; i < 127; i++)
812 baselinetab[i] = bmf->baseline3;
813
814 /* Get the line height of text characters, from the highest
815 * ascender to the lowest descender; req's fonttab to exist. */
816 pix = bmfGetPix(bmf, 32);
817 maxh = pixGetHeight(pix);
818 pixDestroy(&pix);
819 pix = bmfGetPix(bmf, 58);
820 height = pixGetHeight(pix);
821 pixDestroy(&pix);
822 maxh = L_MAX(maxh, height);
823 pix = bmfGetPix(bmf, 93);
824 height = pixGetHeight(pix);
825 pixDestroy(&pix);
826 maxh = L_MAX(maxh, height);
827 bmf->lineheight = maxh;
828
829 /* Get the kern width (distance between characters).
830 * We let it be the same for all characters in a given
831 * font size, and scale it linearly with the size;
832 * req's fonttab to be built first. */
833 bmfGetWidth(bmf, 120, &xwidth);
834 kernwidth = (l_int32)(0.08 * (l_float32)xwidth + 0.5);
835 bmf->kernwidth = L_MAX(1, kernwidth);
836
837 /* Save the space width (between words) */
838 bmfGetWidth(bmf, 32, &charwidth);
839 bmf->spacewidth = charwidth;
840
841 /* Save the extra vertical space between lines */
842 bmf->vertlinesep = (l_int32)(VertFractSep * bmf->lineheight + 0.5);
843
844 return 0;
845 }