comparison mupdf-source/source/fitz/output-pdfocr.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <assert.h>
26 #include <string.h>
27 #include <limits.h>
28
29 #ifdef OCR_DISABLED
30
31 /* In non-OCR builds, we need to define this otherwise SWIG Python gets SEGV
32 when it attempts to import mupdf.py and _mupdf.py. */
33 const char *fz_pdfocr_write_options_usage = "";
34
35 #else
36
37 #include "tessocr.h"
38
39 const char *fz_pdfocr_write_options_usage =
40 "PDFOCR output options:\n"
41 "\tcompression=none: No compression (default)\n"
42 "\tcompression=flate: Flate compression\n"
43 "\tstrip-height=N: Strip height (default 0=fullpage)\n"
44 "\tocr-language=<lang>: OCR language (default=eng)\n"
45 "\tocr-datadir=<datadir>: OCR data path (default=rely on TESSDATA_PREFIX)\n"
46 "\tskew=none,auto,<angle>: Whether to skew correct (default=none).\n"
47 "\tskew-border=increase,maintain,decrease: Size change for border pixels (default=increase).\n"
48 "\n";
49
50 static const char funky_font[] =
51 "3 0 obj\n<</BaseFont/GlyphLessFont/DescendantFonts[4 0 R]"
52 "/Encoding/Identity-H/Subtype/Type0/ToUnicode 6 0 R/Type/Font"
53 ">>\nendobj\n";
54
55 static const char funky_font2[] =
56 "4 0 obj\n"
57 "<</BaseFont/GlyphLessFont/CIDToGIDMap 5 0 R"
58 "/CIDSystemInfo<</Ordering (Identity)/Registry (Adobe)/Supplement 0>>"
59 "/FontDescriptor 7 0 R/Subtype/CIDFontType2/Type/Font/DW 500>>"
60 "\nendobj\n";
61
62 static const char funky_font3[] =
63 "5 0 obj\n<</Length 210/Filter/FlateDecode>>\nstream\n"
64 "\x78\x9c\xec\xc2\x01\x09\x00\x00\x00\x02\xa0\xfa\x7f\xba\x21\x89"
65 "\xa6\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
66 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
67 "\x80\x7b\x03\x00\x00\xff\xff\xec\xc2\x01\x0d\x00\x00\x00\xc2\x20"
68 "\xdf\xbf\xb4\x45\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
69 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
70 "\x00\x00\x00\x00\x00\xeb\x00\x00\x00\xff\xff\xec\xc2\x01\x0d\x00"
71 "\x00\x00\xc2\x20\xdf\xbf\xb4\x45\x18\x00\x00\x00\x00\x00\x00\x00"
72 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
73 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xeb\x00\x00\x00\xff\xff\xed"
74 "\xc2\x01\x0d\x00\x00\x00\xc2\x20\xdf\xbf\xb4\x45\x18\x00\x00\x00"
75 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
76 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xeb\x00\xff"
77 "\x00\x10"
78 "\nendstream\nendobj\n";
79
80 static const char funky_font4[] =
81 "6 0 obj\n<</Length 353>>\nstream\n"
82 "/CIDInit /ProcSet findresource begin\n"
83 "12 dict begin\n"
84 "begincmap\n"
85 "/CIDSystemInfo\n"
86 "<<\n"
87 " /Registry (Adobe)\n"
88 " /Ordering (UCS)\n"
89 " /Supplement 0\n"
90 ">> def\n"
91 "/CMapName /Adobe-Identity-UCS def\n"
92 "/CMapType 2 def\n"
93 "1 begincodespacerange\n"
94 "<0000> <FFFF>\n"
95 "endcodespacerange\n"
96 "1 beginbfrange\n"
97 "<0000> <FFFF> <0000>\n"
98 "endbfrange\n"
99 "endcmap\n"
100 "CMapName currentdict /CMap defineresource pop\n"
101 "end\n"
102 "end\n"
103 "endstream\n"
104 "endobj\n";
105
106 static const char funky_font5[] =
107 "7 0 obj\n"
108 "<</Ascent 1000/CapHeight 1000/Descent -1/Flags 5"
109 "/FontBBox[0 0 500 1000]/FontFile2 8 0 R/FontName/GlyphLessFont"
110 "/ItalicAngle 0/StemV 80/Type/FontDescriptor>>\nendobj\n";
111
112 static const char funky_font6[] =
113 "8 0 obj\n<</Length 572/Length1 572>>\nstream\n"
114 "\x00\x01\x00\x00\x00\x0a\x00\x80\x00\x03\x00\x20\x4f\x53\x2f\x32"
115 "\x56\xde\xc8\x94\x00\x00\x01\x28\x00\x00\x00\x60\x63\x6d\x61\x70"
116 "\x00\x0a\x00\x34\x00\x00\x01\x90\x00\x00\x00\x1e\x67\x6c\x79\x66"
117 "\x15\x22\x41\x24\x00\x00\x01\xb8\x00\x00\x00\x18\x68\x65\x61\x64"
118 "\x0b\x78\xf1\x65\x00\x00\x00\xac\x00\x00\x00\x36\x68\x68\x65\x61"
119 "\x0c\x02\x04\x02\x00\x00\x00\xe4\x00\x00\x00\x24\x68\x6d\x74\x78"
120 "\x04\x00\x00\x00\x00\x00\x01\x88\x00\x00\x00\x08\x6c\x6f\x63\x61"
121 "\x00\x0c\x00\x00\x00\x00\x01\xb0\x00\x00\x00\x06\x6d\x61\x78\x70"
122 "\x00\x04\x00\x05\x00\x00\x01\x08\x00\x00\x00\x20\x6e\x61\x6d\x65"
123 "\xf2\xeb\x16\xda\x00\x00\x01\xd0\x00\x00\x00\x4b\x70\x6f\x73\x74"
124 "\x00\x01\x00\x01\x00\x00\x02\x1c\x00\x00\x00\x20\x00\x01\x00\x00"
125 "\x00\x01\x00\x00\xb0\x94\x71\x10\x5f\x0f\x3c\xf5\x04\x07\x08\x00"
126 "\x00\x00\x00\x00\xcf\x9a\xfc\x6e\x00\x00\x00\x00\xd4\xc3\xa7\xf2"
127 "\x00\x00\x00\x00\x04\x00\x08\x00\x00\x00\x00\x10\x00\x02\x00\x00"
128 "\x00\x00\x00\x00\x00\x01\x00\x00\x08\x00\xff\xff\x00\x00\x04\x00"
129 "\x00\x00\x00\x00\x04\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00"
130 "\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x00\x00\x02\x00\x04"
131 "\x00\x01\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00"
132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x01\x90\x00\x05"
133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
134 "\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x01\x00\x01\x00\x00\x00"
135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
136 "\x00\x00\x47\x4f\x4f\x47\x00\x40\x00\x00\x00\x00\x00\x01\xff\xff"
137 "\x00\x00\x00\x01\x00\x01\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00"
138 "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00"
139 "\x00\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x14\x00\x03\x00\x00"
140 "\x00\x00\x00\x14\x00\x06\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x00"
141 "\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x04\x00"
142 "\x08\x00\x00\x03\x00\x00\x31\x21\x11\x21\x04\x00\xfc\x00\x08\x00"
143 "\x00\x00\x00\x03\x00\x2a\x00\x00\x00\x03\x00\x00\x00\x05\x00\x16"
144 "\x00\x00\x00\x01\x00\x00\x00\x00\x00\x05\x00\x0b\x00\x16\x00\x03"
145 "\x00\x01\x04\x09\x00\x05\x00\x16\x00\x00\x00\x56\x00\x65\x00\x72"
146 "\x00\x73\x00\x69\x00\x6f\x00\x6e\x00\x20\x00\x31\x00\x2e\x00\x30"
147 "\x56\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x30\x00\x00\x01\x00\x00"
148 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00"
149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
150 "\nendstream\nendobj\n";
151
152 #endif
153
154 fz_pdfocr_options *
155 fz_parse_pdfocr_options(fz_context *ctx, fz_pdfocr_options *opts, const char *args)
156 {
157 #ifdef OCR_DISABLED
158 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
159 #else
160 const char *val;
161
162 memset(opts, 0, sizeof *opts);
163
164 if (fz_has_option(ctx, args, "compression", &val))
165 {
166 if (fz_option_eq(val, "none"))
167 opts->compress = 0;
168 else if (fz_option_eq(val, "flate"))
169 opts->compress = 1;
170 else
171 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Unsupported PDFOCR compression %s (none, or flate only)", val);
172 }
173 if (fz_has_option(ctx, args, "strip-height", &val))
174 {
175 int i = fz_atoi(val);
176 if (i <= 0)
177 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Unsupported PDFOCR strip height %d (suggest 0)", i);
178 opts->strip_height = i;
179 }
180 if (fz_has_option(ctx, args, "ocr-language", &val))
181 {
182 fz_copy_option(ctx, val, opts->language, nelem(opts->language));
183 }
184 if (fz_has_option(ctx, args, "ocr-datadir", &val))
185 {
186 fz_copy_option(ctx, val, opts->datadir, nelem(opts->datadir));
187 }
188 if (fz_has_option(ctx, args, "skew", &val))
189 {
190 if (fz_option_eq(val, "auto"))
191 opts->skew_correct = 1;
192 else
193 {
194 opts->skew_correct = 2;
195 opts->skew_angle = fz_atof(val);
196 }
197 }
198 if (fz_has_option(ctx, args, "skew-border", &val))
199 {
200 if (fz_option_eq(val, "increase"))
201 opts->skew_border = 0;
202 else if (fz_option_eq(val, "maintain"))
203 opts->skew_border = 1;
204 else if (fz_option_eq(val, "decrease"))
205 opts->skew_border = 2;
206 else
207 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Unsupported skew-border option");
208 }
209
210 return opts;
211 #endif
212 }
213
214 void
215 fz_write_pixmap_as_pdfocr(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pdfocr_options *pdfocr)
216 {
217 #ifdef OCR_DISABLED
218 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
219 #else
220 fz_band_writer *writer;
221
222 if (!pixmap || !out)
223 return;
224
225 writer = fz_new_pdfocr_band_writer(ctx, out, pdfocr);
226 fz_try(ctx)
227 {
228 fz_write_header(ctx, writer, pixmap->w, pixmap->h, pixmap->n, pixmap->alpha, pixmap->xres, pixmap->yres, 0, pixmap->colorspace, pixmap->seps);
229 fz_write_band(ctx, writer, pixmap->stride, pixmap->h, pixmap->samples);
230 fz_close_band_writer(ctx, writer);
231 }
232 fz_always(ctx)
233 fz_drop_band_writer(ctx, writer);
234 fz_catch(ctx)
235 fz_rethrow(ctx);
236 #endif
237 }
238
239 #ifndef OCR_DISABLED
240 typedef struct pdfocr_band_writer_s
241 {
242 fz_band_writer super;
243 fz_pdfocr_options options;
244
245 /* The actual output size */
246 int deskewed_w;
247 int deskewed_h;
248
249 int obj_num;
250 int xref_max;
251 int64_t *xref;
252 int pages;
253 int page_max;
254 int *page_obj;
255 unsigned char *stripbuf;
256 unsigned char *compbuf;
257 size_t complen;
258
259 fz_pixmap *skew_bitmap;
260
261 void *tessapi;
262 fz_pixmap *ocrbitmap;
263
264 fz_pdfocr_progress_fn *progress;
265 void *progress_arg;
266 } pdfocr_band_writer;
267
268 static int
269 new_obj(fz_context *ctx, pdfocr_band_writer *writer)
270 {
271 int64_t pos = fz_tell_output(ctx, writer->super.out);
272
273 if (writer->obj_num >= writer->xref_max)
274 {
275 int new_max = writer->xref_max * 2;
276 if (new_max < writer->obj_num + 8)
277 new_max = writer->obj_num + 8;
278 writer->xref = fz_realloc_array(ctx, writer->xref, new_max, int64_t);
279 writer->xref_max = new_max;
280 }
281
282 writer->xref[writer->obj_num] = pos;
283
284 return writer->obj_num++;
285 }
286
287 static void
288 post_skew_write_header(fz_context *ctx, pdfocr_band_writer *writer, int w, int h)
289 {
290 fz_output *out = writer->super.out;
291 int xres = writer->super.xres;
292 int yres = writer->super.yres;
293 int sh = writer->options.strip_height;
294 int n = writer->super.n;
295 int strips;
296 int i;
297
298 if (sh == 0)
299 sh = h;
300 assert(sh != 0 && "pdfocr_write_header() should not be given zero height input.");
301 strips = (h + sh-1)/sh;
302
303 writer->deskewed_w = w;
304 writer->deskewed_h = h;
305
306 writer->stripbuf = Memento_label(fz_malloc(ctx, (size_t)w * sh * n), "pdfocr_stripbuf");
307 writer->complen = fz_deflate_bound(ctx, (size_t)w * sh * n);
308 writer->compbuf = Memento_label(fz_malloc(ctx, writer->complen), "pdfocr_compbuf");
309
310 /* Always round the width of ocrbitmap up to a multiple of 4. */
311 writer->ocrbitmap = fz_new_pixmap(ctx, NULL, (w+3)&~3, h, NULL, 0);
312 fz_set_pixmap_resolution(ctx, writer->ocrbitmap, xres, yres);
313
314 /* Send the Page Object */
315 fz_write_printf(ctx, out, "%d 0 obj\n<</Type/Page/Parent 2 0 R/Resources<</XObject<<", new_obj(ctx, writer));
316 for (i = 0; i < strips; i++)
317 fz_write_printf(ctx, out, "/I%d %d 0 R", i, writer->obj_num + i);
318 fz_write_printf(ctx, out, ">>/Font<</F0 3 0 R>>>>/MediaBox[0 0 %g %g]/Contents %d 0 R>>\nendobj\n",
319 w * 72.0f / xres, h * 72.0f / yres, writer->obj_num + strips);
320 }
321
322 static void
323 pdfocr_write_header(fz_context *ctx, fz_band_writer *writer_, fz_colorspace *cs)
324 {
325 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
326 fz_output *out = writer->super.out;
327 int w = writer->super.w;
328 int h = writer->super.h;
329 int n = writer->super.n;
330 int s = writer->super.s;
331 int a = writer->super.alpha;
332 int sh = writer->options.strip_height;
333
334 if (sh == 0)
335 sh = h;
336 assert(sh != 0 && "pdfocr_write_header() should not be given zero height input.");
337
338 if (a != 0)
339 fz_throw(ctx, FZ_ERROR_ARGUMENT, "PDFOCR cannot write alpha channel");
340 if (s != 0)
341 fz_throw(ctx, FZ_ERROR_ARGUMENT, "PDFOCR cannot write spot colors");
342 if (n != 3 && n != 1)
343 fz_throw(ctx, FZ_ERROR_ARGUMENT, "PDFOCR expected to be Grayscale or RGB");
344
345 fz_free(ctx, writer->stripbuf);
346 writer->stripbuf = NULL;
347 fz_free(ctx, writer->compbuf);
348 writer->compbuf = NULL;
349 fz_drop_pixmap(ctx, writer->ocrbitmap);
350 writer->ocrbitmap = NULL;
351
352 /* Send the file header on the first page */
353 if (writer->pages == 0)
354 {
355 fz_write_string(ctx, out, "%PDF-1.4\n%PDFOCR-1.0\n");
356
357 if (writer->xref_max < 9)
358 {
359 int new_max = 9;
360 writer->xref = fz_realloc_array(ctx, writer->xref, new_max, int64_t);
361 writer->xref_max = new_max;
362 }
363 writer->xref[3] = fz_tell_output(ctx, out);
364 fz_write_data(ctx, out, funky_font, sizeof(funky_font)-1);
365 writer->xref[4] = fz_tell_output(ctx, out);
366 fz_write_data(ctx, out, funky_font2, sizeof(funky_font2)-1);
367 writer->xref[5] = fz_tell_output(ctx, out);
368 fz_write_data(ctx, out, funky_font3, sizeof(funky_font3)-1);
369 writer->xref[6] = fz_tell_output(ctx, out);
370 fz_write_data(ctx, out, funky_font4, sizeof(funky_font4)-1);
371 writer->xref[7] = fz_tell_output(ctx, out);
372 fz_write_data(ctx, out, funky_font5, sizeof(funky_font5)-1);
373 writer->xref[8] = fz_tell_output(ctx, out);
374 fz_write_data(ctx, out, funky_font6, sizeof(funky_font6)-1);
375 }
376
377 if (writer->page_max <= writer->pages)
378 {
379 int new_max = writer->page_max * 2;
380 if (new_max == 0)
381 new_max = writer->pages + 8;
382 writer->page_obj = fz_realloc_array(ctx, writer->page_obj, new_max, int);
383 writer->page_max = new_max;
384 }
385 writer->page_obj[writer->pages] = writer->obj_num;
386 writer->pages++;
387
388 if (writer->options.skew_correct)
389 writer->skew_bitmap = fz_new_pixmap(ctx, n == 3 ? fz_device_rgb(ctx) : fz_device_gray(ctx), w, h, NULL, 0);
390 else
391 post_skew_write_header(ctx, writer, w, h);
392 }
393
394 static void
395 flush_strip(fz_context *ctx, pdfocr_band_writer *writer, int fill)
396 {
397 unsigned char *data = writer->stripbuf;
398 fz_output *out = writer->super.out;
399 int w = writer->deskewed_w;
400 int n = writer->super.n;
401 size_t len = (size_t)w*n*fill;
402
403 /* Buffer is full, compress it and write it. */
404 if (writer->options.compress)
405 {
406 size_t destLen = writer->complen;
407 fz_deflate(ctx, writer->compbuf, &destLen, data, len, FZ_DEFLATE_DEFAULT);
408 len = destLen;
409 data = writer->compbuf;
410 }
411 fz_write_printf(ctx, out, "%d 0 obj\n<</Width %d/ColorSpace/Device%s/Height %d%s/Subtype/Image",
412 new_obj(ctx, writer), w, n == 1 ? "Gray" : "RGB", fill, writer->options.compress ? "/Filter/FlateDecode" : "");
413 fz_write_printf(ctx, out, "/Length %zd/Type/XObject/BitsPerComponent 8>>\nstream\n", len);
414 fz_write_data(ctx, out, data, len);
415 fz_write_string(ctx, out, "\nendstream\nendobj\n");
416 }
417
418 static void
419 post_skew_write_band(fz_context *ctx, pdfocr_band_writer *writer, int stride, int band_start, int band_height, const unsigned char *sp)
420 {
421 int w = writer->deskewed_w;
422 int h = writer->deskewed_h;
423 int n = writer->super.n;
424 int x, y;
425 int sh = writer->options.strip_height;
426 int line;
427 unsigned char *d;
428
429 if (sh == 0)
430 sh = h;
431
432 for (line = 0; line < band_height; line++)
433 {
434 int dstline = (band_start+line) % sh;
435 memcpy(writer->stripbuf + (size_t)w*n*dstline,
436 sp + (size_t)line * w * n,
437 (size_t)w * n);
438 if (dstline+1 == sh)
439 flush_strip(ctx, writer, dstline+1);
440 }
441 if (band_start + band_height == h && h % sh != 0)
442 flush_strip(ctx, writer, h % sh);
443
444 /* Copy strip to ocrbitmap, converting if required. */
445 d = writer->ocrbitmap->samples;
446 d += band_start*w;
447 if (n == 1)
448 {
449 for (y = band_height; y > 0; y--)
450 {
451 memcpy(d, sp, w);
452 if (writer->ocrbitmap->w - w)
453 memset(d + w, 0, writer->ocrbitmap->w - w);
454 d += writer->ocrbitmap->w;
455 }
456 }
457 else
458 {
459 for (y = band_height; y > 0; y--)
460 {
461 for (x = w; x > 0; x--)
462 {
463 *d++ = (sp[0] + 2*sp[1] + sp[2] + 2)>>2;
464 sp += 3;
465 }
466 for (x = writer->ocrbitmap->w - w; x > 0; x--)
467 *d++ = 0;
468 }
469 }
470 }
471
472 static void
473 pdfocr_write_band(fz_context *ctx, fz_band_writer *writer_, int stride, int band_start, int band_height, const unsigned char *sp)
474 {
475 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
476 fz_output *out = writer->super.out;
477 int w = writer->super.w;
478 int n = writer->super.n;
479 unsigned char *d;
480
481 if (!out)
482 return;
483
484 if (writer->skew_bitmap)
485 {
486 d = writer->skew_bitmap->samples;
487 d += band_start*w*n;
488 memcpy(d, sp, w*n*band_height);
489 }
490 else
491 post_skew_write_band(ctx, writer, stride, band_start, band_height, sp);
492 }
493
494 enum
495 {
496 WORD_CONTAINS_L2R = 1,
497 WORD_CONTAINS_R2L = 2,
498 WORD_CONTAINS_T2B = 4,
499 WORD_CONTAINS_B2T = 8
500 };
501
502 typedef struct word_t
503 {
504 struct word_t *next;
505 float bbox[4];
506 int dirn;
507 int len;
508 int chars[FZ_FLEXIBLE_ARRAY];
509 } word_t;
510
511 typedef struct
512 {
513 fz_buffer *buf;
514 pdfocr_band_writer *writer;
515
516 /* We collate the current word into the following fields: */
517 int word_max;
518 int word_len;
519 int *word_chars;
520 float word_bbox[4];
521 int word_dirn;
522 int word_prev_char_bbox[4];
523
524 /* When we finish a word, we try to add it to the line. If the
525 * word fits onto the end of the existing line, great. If not,
526 * we flush the entire line, and start a new one just with the
527 * new word. This enables us to output a whole line at once,
528 * which is beneficial to avoid jittering the font sizes
529 * up/down, which looks bad when we try to select text in the
530 * produced PDF. */
531 word_t *line;
532 word_t **line_tail;
533 float line_bbox[4];
534 int line_dirn;
535
536 float cur_size;
537 float cur_scale;
538 float tx, ty;
539 } char_callback_data_t;
540
541 static void
542 flush_words(fz_context *ctx, char_callback_data_t *cb)
543 {
544 float size;
545
546 if (cb->line == NULL)
547 return;
548
549 if ((cb->line_dirn & (WORD_CONTAINS_T2B | WORD_CONTAINS_B2T)) != 0)
550 {
551 /* Vertical line */
552 }
553 else
554 {
555 /* Horizontal line */
556 size = cb->line_bbox[3] - cb->line_bbox[1];
557
558 if (size != 0 && size != cb->cur_size)
559 {
560 fz_append_printf(ctx, cb->buf, "/F0 %g Tf\n", size);
561 cb->cur_size = size;
562 }
563 /* Guard against division by 0. This makes no difference to the
564 * actual calculation as if size is 0, word->bbox[2] == word->bbox[0]
565 * too. */
566 if (size == 0)
567 size = 1;
568 }
569
570 while (cb->line)
571 {
572 word_t *word = cb->line;
573 float x, y;
574 int i, len = word->len;
575 float scale;
576
577 if ((cb->line_dirn & (WORD_CONTAINS_T2B | WORD_CONTAINS_B2T)) != 0)
578 {
579 /* Contains vertical text. */
580 size = (word->bbox[3] - word->bbox[1]) / len;
581 if (size == 0)
582 size = 1;
583 if (size != cb->cur_size)
584 {
585 fz_append_printf(ctx, cb->buf, "/F0 %g Tf\n", size);
586 cb->cur_size = size;
587 }
588
589 /* Set the scale so that our glyphs fill the line bbox. */
590 scale = (cb->line_bbox[2] - cb->line_bbox[0]) / size * 200;
591 if (scale != 0)
592 {
593 float letter_height = (word->bbox[3] - word->bbox[1]) / len;
594
595 if (scale != cb->cur_scale)
596 {
597 fz_append_printf(ctx, cb->buf, "%d Tz\n", (int)scale);
598 cb->cur_scale = scale;
599 }
600
601 for (i = 0; i < len; i++)
602 {
603 x = word->bbox[0];
604 y = word->bbox[1] + letter_height * i;
605 fz_append_printf(ctx, cb->buf, "%g %g Td\n", x-cb->tx, y-cb->ty);
606 cb->tx = x;
607 cb->ty = y;
608
609 fz_append_printf(ctx, cb->buf, "<%04x>Tj\n", word->chars[i]);
610 }
611 }
612 }
613 else
614 {
615 scale = (word->bbox[2] - word->bbox[0]) / size / len * 200;
616 if (scale != 0)
617 {
618 if (scale != cb->cur_scale)
619 {
620 fz_append_printf(ctx, cb->buf, "%d Tz\n", (int)scale);
621 cb->cur_scale = scale;
622 }
623
624 if ((word->dirn & (WORD_CONTAINS_R2L | WORD_CONTAINS_L2R)) == WORD_CONTAINS_R2L)
625 {
626 /* Purely R2L text */
627 x = word->bbox[0];
628 y = cb->line_bbox[1];
629 fz_append_printf(ctx, cb->buf, "%g %g Td\n", x-cb->tx, y-cb->ty);
630 cb->tx = x;
631 cb->ty = y;
632
633 /* Tesseract has sent us R2L text in R2L order (i.e. in Logical order).
634 * We want to output it in that same logical order, but PDF operators
635 * all move the point as if outputting L2R. We can either reverse the
636 * order of chars (bad, because of cut/paste) or we can perform
637 * gymnastics with the position. We opt for the latter. */
638 fz_append_printf(ctx, cb->buf, "[");
639 for (i = 0; i < len; i++)
640 {
641 if (i == 0)
642 {
643 if (len > 1)
644 fz_append_printf(ctx, cb->buf, "%d", -500*(len-1));
645 }
646 else
647 fz_append_printf(ctx, cb->buf, "%d", 1000);
648 fz_append_printf(ctx, cb->buf, "<%04x>", word->chars[i]);
649 }
650 fz_append_printf(ctx, cb->buf, "]TJ\n");
651 }
652 else
653 {
654 /* L2R (or mixed) text */
655 x = word->bbox[0];
656 y = cb->line_bbox[1];
657 fz_append_printf(ctx, cb->buf, "%g %g Td\n", x-cb->tx, y-cb->ty);
658 cb->tx = x;
659 cb->ty = y;
660
661 fz_append_printf(ctx, cb->buf, "<");
662 for (i = 0; i < len; i++)
663 fz_append_printf(ctx, cb->buf, "%04x", word->chars[i]);
664 fz_append_printf(ctx, cb->buf, ">Tj\n");
665 }
666 }
667 }
668
669 cb->line = word->next;
670 fz_free(ctx, word);
671 }
672
673 cb->line_tail = &cb->line;
674 cb->line = NULL;
675 cb->line_dirn = 0;
676 }
677
678 static void
679 queue_word(fz_context *ctx, char_callback_data_t *cb)
680 {
681 word_t *word;
682 int line_is_v, line_is_h, word_is_v, word_is_h;
683
684 if (cb->word_len == 0)
685 return;
686
687 word = fz_malloc_flexible(ctx, word_t, chars, cb->word_len);
688 word->next = NULL;
689 word->len = cb->word_len;
690 memcpy(word->bbox, cb->word_bbox, 4*sizeof(float));
691 memcpy(word->chars, cb->word_chars, cb->word_len * sizeof(int));
692 cb->word_len = 0;
693
694 line_is_v = !!(cb->line_dirn & (WORD_CONTAINS_B2T | WORD_CONTAINS_T2B));
695 word_is_v = !!(cb->word_dirn & (WORD_CONTAINS_B2T | WORD_CONTAINS_T2B));
696 line_is_h = !!(cb->line_dirn & (WORD_CONTAINS_L2R | WORD_CONTAINS_R2L));
697 word_is_h = !!(cb->word_dirn & (WORD_CONTAINS_L2R | WORD_CONTAINS_R2L));
698
699 word->dirn = cb->word_dirn;
700 cb->word_dirn = 0;
701
702 /* Can we put the new word onto the end of the existing line? */
703 if (cb->line != NULL &&
704 !line_is_v && !word_is_v &&
705 word->bbox[1] <= cb->line_bbox[3] &&
706 word->bbox[3] >= cb->line_bbox[1] &&
707 (word->bbox[0] >= cb->line_bbox[2] || word->bbox[2] <= cb->line_bbox[0]))
708 {
709 /* Can append (horizontal motion). */
710 if (word->bbox[0] < cb->line_bbox[0])
711 cb->line_bbox[0] = word->bbox[0];
712 if (word->bbox[1] < cb->line_bbox[1])
713 cb->line_bbox[1] = word->bbox[1];
714 if (word->bbox[2] > cb->line_bbox[2])
715 cb->line_bbox[2] = word->bbox[2];
716 if (word->bbox[3] > cb->line_bbox[3])
717 cb->line_bbox[3] = word->bbox[3];
718 }
719 else if (cb->line != NULL &&
720 !line_is_h && !word_is_h &&
721 word->bbox[0] <= cb->line_bbox[2] &&
722 word->bbox[2] >= cb->line_bbox[0] &&
723 (word->bbox[1] >= cb->line_bbox[3] || word->bbox[3] <= cb->line_bbox[1]))
724 {
725 /* Can append (vertical motion). */
726 if (!word_is_v)
727 word->dirn |= WORD_CONTAINS_T2B;
728 if (word->bbox[0] < cb->line_bbox[0])
729 cb->line_bbox[0] = word->bbox[0];
730 if (word->bbox[1] < cb->line_bbox[1])
731 cb->line_bbox[1] = word->bbox[1];
732 if (word->bbox[2] > cb->line_bbox[2])
733 cb->line_bbox[2] = word->bbox[2];
734 if (word->bbox[3] > cb->line_bbox[3])
735 cb->line_bbox[3] = word->bbox[3];
736 }
737 else
738 {
739 fz_try(ctx)
740 flush_words(ctx, cb);
741 fz_catch(ctx)
742 {
743 fz_free(ctx, word);
744 fz_rethrow(ctx);
745 }
746 memcpy(cb->line_bbox, word->bbox, 4*sizeof(float));
747 }
748
749 *cb->line_tail = word;
750 cb->line_tail = &word->next;
751 cb->line_dirn |= word->dirn;
752 }
753
754 static void
755 char_callback(fz_context *ctx, void *arg, int unicode,
756 const char *font_name,
757 const int *line_bbox, const int *word_bbox,
758 const int *char_bbox, int pointsize)
759 {
760 char_callback_data_t *cb = (char_callback_data_t *)arg;
761 pdfocr_band_writer *writer = cb->writer;
762 float bbox[4];
763
764 bbox[0] = word_bbox[0] * 72.0f / cb->writer->ocrbitmap->xres;
765 bbox[3] = (writer->ocrbitmap->h - 1 - word_bbox[1]) * 72.0f / cb->writer->ocrbitmap->yres;
766 bbox[2] = word_bbox[2] * 72.0f / cb->writer->ocrbitmap->yres;
767 bbox[1] = (writer->ocrbitmap->h - 1 - word_bbox[3]) * 72.0f / cb->writer->ocrbitmap->yres;
768
769 if (bbox[0] != cb->word_bbox[0] ||
770 bbox[1] != cb->word_bbox[1] ||
771 bbox[2] != cb->word_bbox[2] ||
772 bbox[3] != cb->word_bbox[3])
773 {
774 queue_word(ctx, cb);
775 memcpy(cb->word_bbox, bbox, 4 * sizeof(float));
776 }
777
778 if (cb->word_len == 0)
779 {
780 cb->word_dirn = 0;
781 memcpy(cb->word_prev_char_bbox, char_bbox, 4 * sizeof(int));
782 }
783 else
784 {
785 int ox = cb->word_prev_char_bbox[0] + cb->word_prev_char_bbox[2];
786 int oy = cb->word_prev_char_bbox[1] + cb->word_prev_char_bbox[3];
787 int x = char_bbox[0] + char_bbox[2] - ox;
788 int y = char_bbox[1] + char_bbox[3] - oy;
789 int ax = x < 0 ? -x : x;
790 int ay = y < 0 ? -y : y;
791 if (ax > ay)
792 {
793 if (x > 0)
794 cb->word_dirn |= WORD_CONTAINS_L2R;
795 else if (x < 0)
796 cb->word_dirn |= WORD_CONTAINS_R2L;
797 }
798 else if (ay < ax)
799 {
800 if (y > 0)
801 cb->word_dirn |= WORD_CONTAINS_T2B;
802 else if (y < 0)
803 cb->word_dirn |= WORD_CONTAINS_B2T;
804 }
805 }
806
807 if (cb->word_max == cb->word_len)
808 {
809 int newmax = cb->word_max * 2;
810 if (newmax == 0)
811 newmax = 16;
812 cb->word_chars = fz_realloc_array(ctx, cb->word_chars, newmax, int);
813 cb->word_max = newmax;
814 }
815
816 cb->word_chars[cb->word_len++] = unicode;
817 }
818
819 static int
820 pdfocr_progress(fz_context *ctx, void *arg, int prog)
821 {
822 char_callback_data_t *cb = (char_callback_data_t *)arg;
823 pdfocr_band_writer *writer = cb->writer;
824
825 if (writer->progress == NULL)
826 return 0;
827
828 return writer->progress(ctx, writer->progress_arg, writer->pages - 1, prog);
829 }
830
831 static void
832 do_skew_correct(fz_context *ctx, pdfocr_band_writer *writer)
833 {
834 fz_pixmap *deskewed;
835
836 if (writer->options.skew_correct == 1)
837 writer->options.skew_angle = fz_detect_skew(ctx, writer->skew_bitmap);
838
839 deskewed = fz_deskew_pixmap(ctx, writer->skew_bitmap, writer->options.skew_angle, writer->options.skew_border);
840
841 fz_try(ctx)
842 {
843 post_skew_write_header(ctx, writer, deskewed->w, deskewed->h);
844 post_skew_write_band(ctx, writer, deskewed->stride, 0, deskewed->h, deskewed->samples);
845 }
846 fz_always(ctx)
847 fz_drop_pixmap(ctx, deskewed);
848 fz_catch(ctx)
849 fz_rethrow(ctx);
850 }
851
852 static void
853 pdfocr_write_trailer(fz_context *ctx, fz_band_writer *writer_)
854 {
855 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
856 fz_output *out = writer->super.out;
857 int xres = writer->super.xres;
858 int yres = writer->super.yres;
859 int sh = writer->options.strip_height;
860 int strips;
861 int w, h, i;
862 size_t len;
863 unsigned char *data;
864 fz_buffer *buf = NULL;
865 char_callback_data_t cb = { NULL };
866
867 if (writer->options.skew_correct)
868 do_skew_correct(ctx, writer);
869
870 w = writer->deskewed_w;
871 h = writer->deskewed_h;
872 if (sh == 0)
873 sh = h;
874 strips = (h + sh-1)/sh;
875
876 /* Send the Page contents */
877 /* We need the length to this, so write to a buffer first */
878 fz_var(buf);
879 fz_var(cb);
880 fz_try(ctx)
881 {
882 cb.writer = writer;
883 cb.buf = buf = fz_new_buffer(ctx, 0);
884 cb.line_tail = &cb.line;
885 cb.word_dirn = 0;
886 cb.line_dirn = 0;
887 fz_append_printf(ctx, buf, "q\n%g 0 0 %g 0 0 cm\n", 72.0f/xres, 72.0f/yres);
888 for (i = 0; i < strips; i++)
889 {
890 int at = h - (i+1)*sh;
891 int this_sh = sh;
892 if (at < 0)
893 {
894 this_sh += at;
895 at = 0;
896 }
897 fz_append_printf(ctx, buf, "/P <</MCID 0>> BDC\nq\n%d 0 0 %d 0 %d cm\n/I%d Do\nQ\n",
898 w, this_sh, at, i);
899 }
900
901 fz_append_printf(ctx, buf, "Q\nBT\n3 Tr\n");
902
903 ocr_recognise(ctx, writer->tessapi, writer->ocrbitmap, char_callback, pdfocr_progress, &cb);
904 queue_word(ctx, &cb);
905 flush_words(ctx, &cb);
906 fz_append_printf(ctx, buf, "ET\n");
907
908 len = fz_buffer_storage(ctx, buf, &data);
909 fz_write_printf(ctx, out, "%d 0 obj\n<</Length %zd>>\nstream\n", new_obj(ctx, writer), len);
910 fz_write_data(ctx, out, data, len);
911 fz_drop_buffer(ctx, buf);
912 buf = NULL;
913 fz_write_string(ctx, out, "\nendstream\nendobj\n");
914 }
915 fz_always(ctx)
916 {
917 fz_free(ctx, cb.word_chars);
918 }
919 fz_catch(ctx)
920 {
921 fz_drop_buffer(ctx, buf);
922 fz_rethrow(ctx);
923 }
924 }
925
926 static void
927 pdfocr_close_band_writer(fz_context *ctx, fz_band_writer *writer_)
928 {
929 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
930 fz_output *out = writer->super.out;
931 int i;
932
933 /* We actually do the trailer writing in the close */
934 if (writer->xref_max > 2)
935 {
936 int64_t t_pos;
937
938 /* Catalog */
939 writer->xref[1] = fz_tell_output(ctx, out);
940 fz_write_printf(ctx, out, "1 0 obj\n<</Type/Catalog/Pages 2 0 R>>\nendobj\n");
941
942 /* Page table */
943 writer->xref[2] = fz_tell_output(ctx, out);
944 fz_write_printf(ctx, out, "2 0 obj\n<</Count %d/Kids[", writer->pages);
945
946 for (i = 0; i < writer->pages; i++)
947 {
948 if (i > 0)
949 fz_write_byte(ctx, out, ' ');
950 fz_write_printf(ctx, out, "%d 0 R", writer->page_obj[i]);
951 }
952 fz_write_string(ctx, out, "]/Type/Pages>>\nendobj\n");
953
954 /* Xref */
955 t_pos = fz_tell_output(ctx, out);
956 fz_write_printf(ctx, out, "xref\n0 %d\n0000000000 65535 f \n", writer->obj_num);
957 for (i = 1; i < writer->obj_num; i++)
958 fz_write_printf(ctx, out, "%010ld 00000 n \n", writer->xref[i]);
959 fz_write_printf(ctx, out, "trailer\n<</Size %d/Root 1 0 R>>\nstartxref\n%ld\n%%%%EOF\n", writer->obj_num, t_pos);
960 }
961 }
962
963 static void
964 pdfocr_drop_band_writer(fz_context *ctx, fz_band_writer *writer_)
965 {
966 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
967 fz_free(ctx, writer->stripbuf);
968 fz_free(ctx, writer->compbuf);
969 fz_free(ctx, writer->page_obj);
970 fz_free(ctx, writer->xref);
971 fz_drop_pixmap(ctx, writer->ocrbitmap);
972 ocr_fin(ctx, writer->tessapi);
973 }
974 #endif
975
976 fz_band_writer *fz_new_pdfocr_band_writer(fz_context *ctx, fz_output *out, const fz_pdfocr_options *options)
977 {
978 #ifdef OCR_DISABLED
979 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
980 #else
981 pdfocr_band_writer *writer = fz_new_band_writer(ctx, pdfocr_band_writer, out);
982
983 writer->super.header = pdfocr_write_header;
984 writer->super.band = pdfocr_write_band;
985 writer->super.trailer = pdfocr_write_trailer;
986 writer->super.close = pdfocr_close_band_writer;
987 writer->super.drop = pdfocr_drop_band_writer;
988
989 if (options)
990 writer->options = *options;
991 else
992 memset(&writer->options, 0, sizeof(writer->options));
993
994 /* Objects:
995 * 1 reserved for catalog
996 * 2 for pages tree
997 * 3 font
998 * 4 cidfont
999 * 5 cid to gid map
1000 * 6 tounicode
1001 * 7 font descriptor
1002 * 8 font file
1003 */
1004 writer->obj_num = 9;
1005
1006 fz_try(ctx)
1007 {
1008 writer->tessapi = ocr_init(ctx, writer->options.language, writer->options.datadir);
1009 }
1010 fz_catch(ctx)
1011 {
1012 fz_drop_band_writer(ctx, &writer->super);
1013 fz_rethrow(ctx);
1014 }
1015
1016 return &writer->super;
1017 #endif
1018 }
1019
1020 void
1021 fz_pdfocr_band_writer_set_progress(fz_context *ctx, fz_band_writer *writer_, fz_pdfocr_progress_fn *progress, void *progress_arg)
1022 {
1023 #ifdef OCR_DISABLED
1024 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
1025 #else
1026 pdfocr_band_writer *writer = (pdfocr_band_writer *)writer_;
1027 if (writer == NULL)
1028 return;
1029 if (writer->super.header != pdfocr_write_header)
1030 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Not a pdfocr band writer!");
1031
1032 writer->progress = progress;
1033 writer->progress_arg = progress_arg;
1034 #endif
1035 }
1036
1037 void
1038 fz_save_pixmap_as_pdfocr(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append, const fz_pdfocr_options *pdfocr)
1039 {
1040 #ifdef OCR_DISABLED
1041 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
1042 #else
1043 fz_output *out = fz_new_output_with_path(ctx, filename, append);
1044 fz_try(ctx)
1045 {
1046 fz_write_pixmap_as_pdfocr(ctx, out, pixmap, pdfocr);
1047 fz_close_output(ctx, out);
1048 }
1049 fz_always(ctx)
1050 fz_drop_output(ctx, out);
1051 fz_catch(ctx)
1052 fz_rethrow(ctx);
1053 #endif
1054 }
1055
1056 /* High-level document writer interface */
1057
1058 #ifndef OCR_DISABLED
1059 typedef struct
1060 {
1061 fz_document_writer super;
1062 fz_draw_options draw;
1063 fz_pdfocr_options pdfocr;
1064 fz_pixmap *pixmap;
1065 fz_band_writer *bander;
1066 fz_output *out;
1067 int pagenum;
1068 } fz_pdfocr_writer;
1069
1070 static fz_device *
1071 pdfocr_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
1072 {
1073 fz_pdfocr_writer *wri = (fz_pdfocr_writer*)wri_;
1074 return fz_new_draw_device_with_options(ctx, &wri->draw, mediabox, &wri->pixmap);
1075 }
1076
1077 static void
1078 pdfocr_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
1079 {
1080 fz_pdfocr_writer *wri = (fz_pdfocr_writer*)wri_;
1081 fz_pixmap *pix = wri->pixmap;
1082
1083 fz_try(ctx)
1084 {
1085 fz_close_device(ctx, dev);
1086 fz_write_header(ctx, wri->bander, pix->w, pix->h, pix->n, pix->alpha, pix->xres, pix->yres, wri->pagenum++, pix->colorspace, pix->seps);
1087 fz_write_band(ctx, wri->bander, pix->stride, pix->h, pix->samples);
1088 }
1089 fz_always(ctx)
1090 {
1091 fz_drop_device(ctx, dev);
1092 fz_drop_pixmap(ctx, pix);
1093 wri->pixmap = NULL;
1094 }
1095 fz_catch(ctx)
1096 fz_rethrow(ctx);
1097 }
1098
1099 static void
1100 pdfocr_close_writer(fz_context *ctx, fz_document_writer *wri_)
1101 {
1102 fz_pdfocr_writer *wri = (fz_pdfocr_writer*)wri_;
1103
1104 fz_close_band_writer(ctx, wri->bander);
1105 fz_close_output(ctx, wri->out);
1106 }
1107
1108 static void
1109 pdfocr_drop_writer(fz_context *ctx, fz_document_writer *wri_)
1110 {
1111 fz_pdfocr_writer *wri = (fz_pdfocr_writer*)wri_;
1112
1113 fz_drop_pixmap(ctx, wri->pixmap);
1114 fz_drop_band_writer(ctx, wri->bander);
1115 fz_drop_output(ctx, wri->out);
1116 }
1117 #endif
1118
1119 fz_document_writer *
1120 fz_new_pdfocr_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
1121 {
1122 #ifdef OCR_DISABLED
1123 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
1124 #else
1125 fz_pdfocr_writer *wri = NULL;
1126
1127 fz_var(wri);
1128
1129 fz_try(ctx)
1130 {
1131 wri = fz_new_derived_document_writer(ctx, fz_pdfocr_writer, pdfocr_begin_page, pdfocr_end_page, pdfocr_close_writer, pdfocr_drop_writer);
1132 fz_parse_draw_options(ctx, &wri->draw, options);
1133 fz_parse_pdfocr_options(ctx, &wri->pdfocr, options);
1134 wri->out = out;
1135 wri->bander = fz_new_pdfocr_band_writer(ctx, wri->out, &wri->pdfocr);
1136 }
1137 fz_catch(ctx)
1138 {
1139 fz_drop_output(ctx, out);
1140 fz_free(ctx, wri);
1141 fz_rethrow(ctx);
1142 }
1143
1144 return (fz_document_writer*)wri;
1145 #endif
1146 }
1147
1148 fz_document_writer *
1149 fz_new_pdfocr_writer(fz_context *ctx, const char *path, const char *options)
1150 {
1151 #ifdef OCR_DISABLED
1152 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
1153 #else
1154 fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdfocr", 0);
1155 return fz_new_pdfocr_writer_with_output(ctx, out, options);
1156 #endif
1157 }
1158
1159 void
1160 fz_pdfocr_writer_set_progress(fz_context *ctx, fz_document_writer *writer, fz_pdfocr_progress_fn *progress, void *progress_arg)
1161 {
1162 #ifdef OCR_DISABLED
1163 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "No OCR support in this build");
1164 #else
1165 fz_pdfocr_writer *wri = (fz_pdfocr_writer *)writer;
1166 if (!writer)
1167 return;
1168 if (writer->begin_page != pdfocr_begin_page)
1169 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Not a pdfocr writer!");
1170 fz_pdfocr_band_writer_set_progress(ctx, wri->bander, progress, progress_arg);
1171 #endif
1172 }