Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/ocr-device.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/ocr-device.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1205 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <assert.h> +#include <string.h> +#include <errno.h> + +#undef DEBUG_OCR + +#ifndef OCR_DISABLED +#include "tessocr.h" + +/* + +This device can be used in 2 modes, with or without a list. + +In both modes the OCR device is created with a target device. The +caller runs the page to the device, and the device processes the calls +and (eventually) calls through to the target. + +In both modes, all incoming calls are forwarded to an internal draw +device to render the page, so the page rendering is always complete. +The incoming calls are also forwarded (mostly, eventually) to the +target. Where the 2 modes differ is in the timing/content of those +forwarded calls. + +In the first mode (without a list), the device instantly forwards all +non-text calls to the target. When the OCR device is closed, an OCR pass +is performed, and the recovered text is forwarded to the target. All +recovered text is listed as Courier, and ends up on top of the content. + +This is fine for text extraction and probably for most cases of document +conversion. It's no good for correcting the unicode values within a +document though. + +So, we have concocted a second way of working, using a display list. In +this mode, as well as rendering every device call that comes in, it +forwards them to a display list (and not the target). When the device +is closed we OCR the text image, and store the results. We then play +the list back through a 'rewrite' device to the target. The rewrite +device rewrites the text objects with the correct unicode values. Any +characters given by the OCR pass that aren't used by the rewrite step +are then sent through as invisible text. + +This means that all the target device sees is the exact same graphical +objects in the exact same order, but with corrected unicode values. +Also, any text that appears in the document as a result of images or +line art is sent through as 'invisible' text at the end, so it will work +for cut/paste or search. + +Or, at least, that was the plan. Unfortunately, it turns out that +Tesseract (with the LSTM engine (the most modern one)) is really bad at +giving bounding boxes for characters. It seems that the neural network +can say "hey, there is an 'X'", but it can't actually say where the X +occurred within the word. So tesseract knows where the words are, and +knows the order of the letters within the word, but basically guesses +at bboxes for the letters. + +Because of this, we can't rely on character bboxes from tesseract to be +correct. We have to work off the word bboxes alone, together with the +order in which characters are passed to us. + +So, as Tesseract gives us data, we store the word bbox, together with +the list of chars within that word. + +When we play the list back through the display device, we then have to +rewrite text objects based on which word they are in. For the first +version, we'll make the extremely dodgy assumption that characters +come in the same order within the word. + +For future versions we may want to collect bboxes for each text char +on our initial list building pass, collate those into matching 'words' +and sort them accordingly. +*/ + + +typedef struct word_record_s { + int len; + fz_rect bbox; + int n; + int unicode[FZ_FLEXIBLE_ARRAY]; +} word_record; + +typedef struct fz_ocr_device_s +{ + fz_device super; + + /* Progress monitoring */ + int (*progress)(fz_context *, void *, int progress); + void *progress_arg; + + fz_device *target; + fz_display_list *list; + fz_device *list_dev; + fz_device *draw_dev; + fz_pixmap *pixmap; + + fz_rect mediabox; + fz_matrix ctm; + + fz_rect word_bbox; + fz_font *font; + + /* Current word */ + int char_max; + int char_len; + int *chars; + + /* Entire page */ + int words_max; + int words_len; + word_record **words; + + char *language; + char *datadir; +} fz_ocr_device; + +static void +fz_ocr_fill_path(fz_context *ctx, fz_device *dev, const fz_path *path, int even_odd, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_fill_path(ctx, ocr->list_dev, path, even_odd, ctm, colorspace, color, alpha, color_params); + fz_fill_path(ctx, ocr->draw_dev, path, even_odd, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_ocr_stroke_path(fz_context *ctx, fz_device *dev, const fz_path *path, const fz_stroke_state *stroke, + fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_stroke_path(ctx, ocr->list_dev, path, stroke, ctm, colorspace, color, alpha, color_params); + fz_stroke_path(ctx, ocr->draw_dev, path, stroke, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_ocr_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + if (ocr->list_dev != ocr->target) + fz_fill_text(ctx, ocr->list_dev, text, ctm, colorspace, color, alpha, color_params); + fz_fill_text(ctx, ocr->draw_dev, text, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_ocr_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, + fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + if (ocr->list_dev != ocr->target) + fz_stroke_text(ctx, ocr->list_dev, text, stroke, ctm, colorspace, color, alpha, color_params); + fz_stroke_text(ctx, ocr->draw_dev, text, stroke, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_ocr_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shade, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_fill_shade(ctx, ocr->list_dev, shade, ctm, alpha, color_params); + fz_fill_shade(ctx, ocr->draw_dev, shade, ctm, alpha, color_params); +} + +static void +fz_ocr_fill_image(fz_context *ctx, fz_device *dev, fz_image *image, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_fill_image(ctx, ocr->list_dev, image, ctm, alpha, color_params); + fz_fill_image(ctx, ocr->draw_dev, image, ctm, alpha, color_params); +} + +static void +fz_ocr_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *image, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_fill_image_mask(ctx, ocr->list_dev, image, ctm, colorspace, color, alpha, color_params); + fz_fill_image_mask(ctx, ocr->draw_dev, image, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_ocr_clip_path(fz_context *ctx, fz_device *dev, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_clip_path(ctx, ocr->list_dev, path, even_odd, ctm, scissor); + fz_clip_path(ctx, ocr->draw_dev, path, even_odd, ctm, scissor); +} + +static void +fz_ocr_clip_stroke_path(fz_context *ctx, fz_device *dev, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_clip_stroke_path(ctx, ocr->list_dev, path, stroke, ctm, scissor); + fz_clip_stroke_path(ctx, ocr->draw_dev, path, stroke, ctm, scissor); +} + +static void +fz_ocr_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_rect scissor) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + if (ocr->list_dev != ocr->target) + fz_clip_text(ctx, ocr->list_dev, text, ctm, scissor); + fz_clip_text(ctx, ocr->draw_dev, text, ctm, scissor); +} + +static void +fz_ocr_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + if (ocr->list_dev != ocr->target) + fz_clip_stroke_text(ctx, ocr->list_dev, text, stroke, ctm, scissor); + fz_clip_stroke_text(ctx, ocr->draw_dev, text, stroke, ctm, scissor); +} + +static void +fz_ocr_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + /* Ignore text is generally used when text has been sent as + * part of other graphics - such as line art or images. As such + * we'll pick up the 'true' unicode values of such text in the + * OCR phase. We therefore send text to the list device (so + * it can be rewritten), but not direct to the target. */ + if (ocr->list_dev != ocr->target) + fz_ignore_text(ctx, ocr->list_dev, text, ctm); + fz_ignore_text(ctx, ocr->draw_dev, text, ctm); +} + +static void +fz_ocr_clip_image_mask(fz_context *ctx, fz_device *dev, fz_image *image, fz_matrix ctm, fz_rect scissor) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_clip_image_mask(ctx, ocr->list_dev, image, ctm, scissor); + fz_clip_image_mask(ctx, ocr->draw_dev, image, ctm, scissor); +} + +static void +fz_ocr_pop_clip(fz_context *ctx, fz_device *dev) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_pop_clip(ctx, ocr->list_dev); + fz_pop_clip(ctx, ocr->draw_dev); +} + +static void +fz_ocr_begin_mask(fz_context *ctx, fz_device *dev, fz_rect rect, int luminosity, fz_colorspace *colorspace, const float *color, fz_color_params color_params) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_begin_mask(ctx, ocr->list_dev, rect, luminosity, colorspace, color, color_params); + fz_begin_mask(ctx, ocr->draw_dev, rect, luminosity, colorspace, color, color_params); +} + +static void +fz_ocr_end_mask(fz_context *ctx, fz_device *dev, fz_function *tr) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_end_mask_tr(ctx, ocr->list_dev, tr); + fz_end_mask_tr(ctx, ocr->draw_dev, tr); +} + +static void +fz_ocr_begin_group(fz_context *ctx, fz_device *dev, fz_rect rect, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_begin_group(ctx, ocr->list_dev, rect, cs, isolated, knockout, blendmode, alpha); + fz_begin_group(ctx, ocr->draw_dev, rect, cs, isolated, knockout, blendmode, alpha); +} + +static void +fz_ocr_end_group(fz_context *ctx, fz_device *dev) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_end_group(ctx, ocr->list_dev); + fz_end_group(ctx, ocr->draw_dev); +} + +static int +fz_ocr_begin_tile(fz_context *ctx, fz_device *dev, fz_rect area, fz_rect view, float xstep, float ystep, fz_matrix ctm, int id, int doc_id) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + /* Always pass 0 as tile id here so that neither device can + * disagree about whether the contents need to be sent. */ + (void)fz_begin_tile_tid(ctx, ocr->list_dev, area, view, xstep, ystep, ctm, 0, 0); + (void)fz_begin_tile_tid(ctx, ocr->draw_dev, area, view, xstep, ystep, ctm, 0, 0); + + return 0; +} + +static void +fz_ocr_end_tile(fz_context *ctx, fz_device *dev) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_end_tile(ctx, ocr->list_dev); + fz_end_tile(ctx, ocr->draw_dev); +} + +static void +fz_ocr_render_flags(fz_context *ctx, fz_device *dev, int set, int clear) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_render_flags(ctx, ocr->list_dev, set, clear); + fz_render_flags(ctx, ocr->draw_dev, set, clear); +} + +static void +fz_ocr_set_default_colorspaces(fz_context *ctx, fz_device *dev, fz_default_colorspaces *cs) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_set_default_colorspaces(ctx, ocr->list_dev, cs); + fz_set_default_colorspaces(ctx, ocr->draw_dev, cs); +} + +static void +fz_ocr_begin_layer(fz_context *ctx, fz_device *dev, const char *layer_name) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_begin_layer(ctx, ocr->list_dev, layer_name); + fz_begin_layer(ctx, ocr->draw_dev, layer_name); +} + +static void +fz_ocr_end_layer(fz_context *ctx, fz_device *dev) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + + fz_end_layer(ctx, ocr->list_dev); + fz_end_layer(ctx, ocr->draw_dev); +} + +static void +drop_ocr_device(fz_context *ctx, fz_ocr_device *ocr) +{ + int i; + + if (ocr == NULL) + return; + + if (ocr->list_dev != ocr->target) + fz_drop_device(ctx, ocr->list_dev); + fz_drop_display_list(ctx, ocr->list); + fz_drop_device(ctx, ocr->draw_dev); + fz_drop_pixmap(ctx, ocr->pixmap); + for (i = 0; i < ocr->words_len; i++) + fz_free(ctx, ocr->words[i]); + fz_free(ctx, ocr->words); + fz_free(ctx, ocr->chars); + fz_free(ctx, ocr->language); + fz_free(ctx, ocr->datadir); +} + +static void +flush_word(fz_context *ctx, fz_ocr_device *ocr) +{ + float color = 1; + fz_color_params params = { 0 }; + int i; + fz_text *text = NULL; + fz_matrix trm; + float step; + fz_rect char_bbox; + + if (ocr->char_len == 0) + return; + + /* If we're not sending direct to the target device, then insert + * all the chars we've found into a table so we can rewrite + * the text objects that come from the list device on the fly. + */ + if (ocr->list_dev != ocr->target) + { + word_record *word; + + if (ocr->words_len == ocr->words_max) + { + int new_max = ocr->words_max * 2; + if (new_max == 0) + new_max = 32; + ocr->words = fz_realloc_array(ctx, ocr->words, new_max, word_record *); + ocr->words_max = new_max; + } + word = fz_malloc_flexible(ctx, word_record, unicode, ocr->char_len); + word->len = ocr->char_len; + word->bbox = ocr->word_bbox; + word->n = 0; + memcpy(word->unicode, ocr->chars, ocr->char_len * sizeof(int)); + ocr->words[ocr->words_len++] = word; + ocr->char_len = 0; + return; + } + /* FIXME: Look at font-name. */ + /* All this is a bit horrid, because the detection of sizes for + * the glyphs depends on the width of the glyphs. Use Courier + * because it's monospaced. */ + if (ocr->font == NULL) + ocr->font = fz_new_base14_font(ctx, "Courier"); + + fz_var(text); + + fz_try(ctx) + { + text = fz_new_text(ctx); + + /* Divide the word box into equal lengths. */ + /* This falls down when we have words with chars of + * different widths in, but it's acceptable for these + * purposes. */ + /* FIXME: This assumes L2R motion of text. */ + step = (ocr->word_bbox.x1 - ocr->word_bbox.x0) / ocr->char_len; + char_bbox.x1 = ocr->word_bbox.x0; + char_bbox.y0 = ocr->word_bbox.y0; + char_bbox.y1 = ocr->word_bbox.y1; + for (i = 0; i < ocr->char_len; i++) + { + char_bbox.x0 = char_bbox.x1; + char_bbox.x1 += step; + /* Horrid constants that happen to work with Courier. */ + trm.a = 10.0f/6 * (char_bbox.x1 - char_bbox.x0); + trm.b = 0; + trm.c = 0; + trm.d = 10.0f/6 * (char_bbox.y1 - char_bbox.y0); + trm.e = char_bbox.x0; + trm.f = char_bbox.y0; + fz_show_glyph(ctx, text, ocr->font, trm, + fz_encode_character(ctx, ocr->font, ocr->chars[i]), ocr->chars[i], + 0, 0, FZ_BIDI_LTR, 0); + } + + fz_fill_text(ctx, ocr->target, text, fz_identity, + fz_device_gray(ctx), &color, 1, params); + } + fz_always(ctx) + { + fz_drop_text(ctx, text); + } + fz_catch(ctx) + fz_rethrow(ctx); + + ocr->char_len = 0; +} + +static void +char_callback(fz_context *ctx, void *arg, int unicode, + const char *font_name, + const int *line_bbox, const int *word_bbox, + const int *char_bbox, int pointsize) +{ + fz_ocr_device *ocr = (fz_ocr_device *)arg; + fz_rect bbox = { word_bbox[0]-1, word_bbox[1]-1, word_bbox[2]+1, word_bbox[3]+1 }; + + if (bbox.x0 != ocr->word_bbox.x0 || + bbox.y0 != ocr->word_bbox.y0 || + bbox.x1 != ocr->word_bbox.x1 || + bbox.y1 != ocr->word_bbox.y1) + { + flush_word(ctx, ocr); + ocr->word_bbox = bbox; + } + + if (ocr->char_max == ocr->char_len) + { + int new_max = ocr->char_max * 2; + if (new_max == 0) + new_max = 32; + ocr->chars = fz_realloc_array(ctx, ocr->chars, new_max, int); + ocr->char_max = new_max; + } + + ocr->chars[ocr->char_len++] = unicode; +} + + +typedef struct +{ + fz_device super; + + fz_device *target; + int words_len; + word_record **words; + int current; +} fz_rewrite_device; + +static fz_text_span * +fz_clone_text_span(fz_context *ctx, const fz_text_span *span) +{ + fz_text_span *cspan; + + if (span == NULL) + return NULL; + + cspan = fz_malloc_struct(ctx, fz_text_span); + *cspan = *span; + cspan->cap = cspan->len; + cspan->items = fz_calloc_no_throw(ctx, cspan->len, sizeof(*cspan->items)); + if (cspan->items == NULL) + { + fz_free(ctx, cspan); + errno = ENOMEM; + fz_throw(ctx, FZ_ERROR_SYSTEM, "calloc (%zu x %zu bytes) failed", (size_t)cspan->len, sizeof(*cspan->items)); + } + memcpy(cspan->items, span->items, sizeof(*cspan->items) * cspan->len); + fz_keep_font(ctx, cspan->font); + + return cspan; +} + +#ifdef DEBUG_OCR +static void +debug_word(fz_context *ctx, word_record *word) +{ + int i; + + fz_write_printf(ctx, fz_stdout(ctx), " %g %g %g %g:", + word->bbox.x0, + word->bbox.y0, + word->bbox.x1, + word->bbox.y1); + + for (i = 0; i < word->n; i++) + { + int unicode = word->unicode[i]; + if (unicode >= 32 && unicode < 127) + fz_write_printf(ctx, fz_stdout(ctx), "%c", unicode); + else + fz_write_printf(ctx, fz_stdout(ctx), "<%04x>", unicode); + } + if (word->n < word->len) + { + int unicode = word->unicode[i++]; + if (unicode >= 32 && unicode < 127) + fz_write_printf(ctx, fz_stdout(ctx), "{%c}", unicode); + else + fz_write_printf(ctx, fz_stdout(ctx), "{<%04x>}", unicode); + for (; i < word->len; i++) + { + int unicode = word->unicode[i]; + if (unicode >= 32 && unicode < 127) + fz_write_printf(ctx, fz_stdout(ctx), "%c", unicode); + else + fz_write_printf(ctx, fz_stdout(ctx), "<%04x>", unicode); + } + } + fz_write_printf(ctx, fz_stdout(ctx), "\n"); +} +#endif + +static void +rewrite_char(fz_context *ctx, fz_rewrite_device *dev, fz_matrix ctm, fz_text_item *item, fz_point vadv) +{ + int i, start; + fz_point p = { item->x, item->y }; + + /* No point in trying to rewrite spaces! */ + if (item->ucs == 32) + return; + + p = fz_transform_point(p, ctm); + p.x += vadv.x/2; + p.y += vadv.y/2; + +#ifdef DEBUG_OCR + fz_write_printf(ctx, fz_stdout(ctx), "Looking for '%c' at %g %g\n", item->ucs, p.x, p.y); +#endif + + start = dev->current; + for (i = start; i < dev->words_len; i++) + { +#ifdef DEBUG_OCR + debug_word(ctx, dev->words[i]); +#endif + if (dev->words[i]->n >= dev->words[i]->len) + continue; + if (dev->words[i]->bbox.x0 <= p.x && + dev->words[i]->bbox.x1 >= p.x && + dev->words[i]->bbox.y0 <= p.y && + dev->words[i]->bbox.y1 >= p.y) + { + item->ucs = dev->words[i]->unicode[dev->words[i]->n++]; + dev->current = i; + return; + } + } + for (i = 0; i < start; i++) + { +#ifdef DEBUG_OCR + debug_word(ctx, dev->words[i]); +#endif + if (dev->words[i]->n >= dev->words[i]->len) + continue; + if (dev->words[i]->bbox.x0 <= p.x && + dev->words[i]->bbox.x1 >= p.x && + dev->words[i]->bbox.y0 <= p.y && + dev->words[i]->bbox.y1 >= p.y) + { + item->ucs = dev->words[i]->unicode[dev->words[i]->n++]; + dev->current = i; + return; + } + } +} + +static fz_text_span * +rewrite_span(fz_context *ctx, fz_rewrite_device *dev, fz_matrix ctm, const fz_text_span *span) +{ + fz_text_span *rspan = fz_clone_text_span(ctx, span); + int wmode = span->wmode; + int i; + fz_point dir; + fz_matrix trm = span->trm; + + trm.e = 0; + trm.f = 0; + trm = fz_concat(trm, ctm); + + if (wmode == 0) + { + dir.x = 1; + dir.y = 0; + } + else + { + dir.x = 0; + dir.y = -1; + } + dir = fz_transform_vector(dir, trm); + + /* And do the actual rewriting */ + for (i = 0; i < rspan->len; i++) { + float advance = rspan->items[i].adv; + fz_point vadv = { dir.x * advance, dir.y * advance }; + rewrite_char(ctx, dev, ctm, &rspan->items[i], vadv); + } + + return rspan; +} + +static fz_text * +rewrite_text(fz_context *ctx, fz_rewrite_device *dev, fz_matrix ctm, const fz_text *text) +{ + fz_text *rtext = fz_new_text(ctx); + fz_text_span *span = text->head; + fz_text_span **dspan = &rtext->head; + + fz_try(ctx) + { + while (span) + { + *dspan = rewrite_span(ctx, dev, ctm, span); + rtext->tail = *dspan; + dspan = &(*dspan)->next; + span = span->next; + } + } + fz_catch(ctx) + { + fz_drop_text(ctx, rtext); + fz_rethrow(ctx); + } + + return rtext; +} + +static void +rewrite_fill_path(fz_context *ctx, fz_device *dev, const fz_path *path, int even_odd, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_fill_path(ctx, rewrite->target, path, even_odd, ctm, cs, color, alpha, params); +} + +static void +rewrite_stroke_path(fz_context *ctx, fz_device *dev, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_stroke_path(ctx, rewrite->target, path, stroke, ctm, cs, color, alpha, params); +} + +static void +rewrite_clip_path(fz_context *ctx, fz_device *dev, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_clip_path(ctx, rewrite->target, path, even_odd, ctm, scissor); +} + +static void +rewrite_clip_stroke_path(fz_context *ctx, fz_device *dev, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_clip_stroke_path(ctx, rewrite->target, path, stroke, ctm, scissor); +} + +static void +rewrite_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_text *rtext = rewrite_text(ctx, rewrite, ctm, text); + + fz_try(ctx) + fz_fill_text(ctx, rewrite->target, rtext, ctm, cs, color, alpha, params); + fz_always(ctx) + fz_drop_text(ctx, rtext); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +rewrite_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_text *rtext = rewrite_text(ctx, rewrite, ctm, text); + + fz_try(ctx) + fz_stroke_text(ctx, rewrite->target, rtext, stroke, ctm, cs, color, alpha, params); + fz_always(ctx) + fz_drop_text(ctx, rtext); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +rewrite_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_rect scissor) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_text *rtext = rewrite_text(ctx, rewrite, ctm, text); + + fz_try(ctx) + fz_clip_text(ctx, rewrite->target, rtext, ctm, scissor); + fz_always(ctx) + fz_drop_text(ctx, rtext); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +rewrite_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_text *rtext = rewrite_text(ctx, rewrite, ctm, text); + + fz_try(ctx) + fz_clip_stroke_text(ctx, rewrite->target, rtext, stroke, ctm, scissor); + fz_always(ctx) + fz_drop_text(ctx, rtext); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +rewrite_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_text *rtext = rewrite_text(ctx, rewrite, ctm, text); + + fz_try(ctx) + fz_ignore_text(ctx, rewrite->target, rtext, ctm); + fz_always(ctx) + fz_drop_text(ctx, rtext); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +rewrite_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shd, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_fill_shade(ctx, rewrite->target, shd, ctm, alpha, color_params); +} + +static void +rewrite_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_fill_image(ctx, rewrite->target, img, ctm, alpha, color_params); +} + +static void +rewrite_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params color_params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_fill_image_mask(ctx, rewrite->target, img, ctm, cs, color, alpha, color_params); +} + +static void +rewrite_clip_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_rect scissor) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_clip_image_mask(ctx, rewrite->target, img, ctm, scissor); +} + +static void +rewrite_pop_clip(fz_context *ctx, fz_device *dev) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_pop_clip(ctx, rewrite->target); +} + +static void +rewrite_begin_mask(fz_context *ctx, fz_device *dev, fz_rect area, int luminosity, fz_colorspace *cs, const float *bc, fz_color_params params) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_begin_mask(ctx, rewrite->target, area, luminosity, cs, bc, params); +} + +static void +rewrite_end_mask(fz_context *ctx, fz_device *dev, fz_function *tr) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_end_mask_tr(ctx, rewrite->target, tr); +} + +static void +rewrite_begin_group(fz_context *ctx, fz_device *dev, fz_rect area, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_begin_group(ctx, rewrite->target, area, cs, isolated, knockout, blendmode, alpha); +} + +static void +rewrite_end_group(fz_context *ctx, fz_device *dev) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_end_group(ctx, rewrite->target); +} + +static int +rewrite_begin_tile(fz_context *ctx, fz_device *dev, fz_rect area, fz_rect view, float xstep, float ystep, fz_matrix ctm, int id, int doc_id) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + return fz_begin_tile_tid(ctx, rewrite->target, area, view, xstep, ystep, ctm, id, doc_id); +} + +static void +rewrite_end_tile(fz_context *ctx, fz_device *dev) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_end_tile(ctx, rewrite->target); +} + +static void +rewrite_render_flags(fz_context *ctx, fz_device *dev, int set, int clear) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_render_flags(ctx, rewrite->target, set, clear); +} + +static void +rewrite_set_default_colorspaces(fz_context *ctx, fz_device *dev, fz_default_colorspaces *cs) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_set_default_colorspaces(ctx, rewrite->target, cs); +} + +static void +rewrite_begin_layer(fz_context *ctx, fz_device *dev, const char *layer_name) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_begin_layer(ctx, rewrite->target, layer_name); +} + +static void +rewrite_end_layer(fz_context *ctx, fz_device *dev) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + + fz_end_layer(ctx, rewrite->target); +} + +static void +rewrite_close(fz_context *ctx, fz_device *dev) +{ + fz_rewrite_device *rewrite = (fz_rewrite_device *)dev; + fz_font *font; + fz_text *text = NULL; + fz_matrix trm; + int i, j; + + /* All this is a bit horrid, because the detection of sizes for + * the glyphs depends on the width of the glyphs. Use Courier + * because it's monospaced. */ + font = fz_new_base14_font(ctx, "Courier"); + + fz_var(text); + + fz_try(ctx) + { + text = fz_new_text(ctx); + + for (i = 0; i < rewrite->words_len; i++) + { + word_record *word = rewrite->words[i]; + fz_rect char_bbox; + float step; + + if (word->n >= word->len) + continue; + step = (word->bbox.x1 - word->bbox.x0) / word->len; + char_bbox.x1 = word->bbox.x0; + char_bbox.y0 = word->bbox.y0; + char_bbox.y1 = word->bbox.y1; + for (j = 0; j < word->len; j++) + { + char_bbox.x0 = char_bbox.x1; + char_bbox.x1 += step; + /* Horrid constants that happen to work with Courier. */ + trm.a = 10.0f/6 * (char_bbox.x1 - char_bbox.x0); + trm.b = 0; + trm.c = 0; + trm.d = (char_bbox.y1 - char_bbox.y0); + trm.e = char_bbox.x0; + trm.f = char_bbox.y0; + fz_show_glyph(ctx, text, font, trm, + word->unicode[j], word->unicode[j], + 0, 0, FZ_BIDI_LTR, 0); + } + } + + fz_ignore_text(ctx, rewrite->target, text, fz_identity); + } + fz_always(ctx) + { + fz_drop_text(ctx, text); + fz_drop_font(ctx, font); + } + fz_catch(ctx) + fz_rethrow(ctx); +} + +static fz_device * +new_rewrite_device(fz_context *ctx, fz_device *target, word_record **words, int words_len) +{ + fz_rewrite_device *rewrite; + + rewrite = fz_new_derived_device(ctx, fz_rewrite_device); + + rewrite->super.close_device = rewrite_close; + + rewrite->super.fill_path = rewrite_fill_path; + rewrite->super.stroke_path = rewrite_stroke_path; + rewrite->super.clip_path = rewrite_clip_path; + rewrite->super.clip_stroke_path = rewrite_clip_stroke_path; + + rewrite->super.fill_text = rewrite_fill_text; + rewrite->super.stroke_text = rewrite_stroke_text; + rewrite->super.clip_text = rewrite_clip_text; + rewrite->super.clip_stroke_text = rewrite_clip_stroke_text; + rewrite->super.ignore_text = rewrite_ignore_text; + + rewrite->super.fill_shade = rewrite_fill_shade; + rewrite->super.fill_image = rewrite_fill_image; + rewrite->super.fill_image_mask = rewrite_fill_image_mask; + rewrite->super.clip_image_mask = rewrite_clip_image_mask; + + rewrite->super.pop_clip = rewrite_pop_clip; + + rewrite->super.begin_mask = rewrite_begin_mask; + rewrite->super.end_mask = rewrite_end_mask; + rewrite->super.begin_group = rewrite_begin_group; + rewrite->super.end_group = rewrite_end_group; + + rewrite->super.begin_tile = rewrite_begin_tile; + rewrite->super.end_tile = rewrite_end_tile; + + rewrite->super.render_flags = rewrite_render_flags; + rewrite->super.set_default_colorspaces = rewrite_set_default_colorspaces; + + rewrite->super.begin_layer = rewrite_begin_layer; + rewrite->super.end_layer = rewrite_end_layer; + + rewrite->target = target; + rewrite->words = words; + rewrite->words_len = words_len; + rewrite->current = 0; + + return &rewrite->super; +} + +static int +fz_ocr_progress(fz_context *ctx, void *arg, int prog) +{ + fz_ocr_device *ocr = (fz_ocr_device *)arg; + + if (ocr->progress == NULL) + return 0; + + return ocr->progress(ctx, ocr->progress_arg, prog); +} + +static void +fz_ocr_close_device(fz_context *ctx, fz_device *dev) +{ + fz_ocr_device *ocr = (fz_ocr_device *)dev; + void *tessapi; + fz_device *rewrite_device; + fz_rect bbox; + + fz_close_device(ctx, ocr->draw_dev); + + /* Now run the OCR */ + tessapi = ocr_init(ctx, ocr->language, ocr->datadir); + + fz_try(ctx) + { + ocr_recognise(ctx, tessapi, ocr->pixmap, char_callback, &fz_ocr_progress, ocr); + flush_word(ctx, ocr); + } + fz_always(ctx) + ocr_fin(ctx, tessapi); + fz_catch(ctx) + fz_rethrow(ctx); + + /* If we're not using a list, we're done! */ + if (ocr->list_dev == ocr->target) + return; + + fz_close_device(ctx, ocr->list_dev); + + bbox = fz_transform_rect(ocr->mediabox, ocr->ctm); + rewrite_device = new_rewrite_device(ctx, ocr->target, ocr->words, ocr->words_len); + fz_try(ctx) + { + fz_run_display_list(ctx, ocr->list, rewrite_device, + fz_identity, bbox, NULL); + } + fz_always(ctx) + { + fz_close_device(ctx, rewrite_device); + fz_drop_device(ctx, rewrite_device); + } + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +fz_ocr_drop_device(fz_context *ctx, fz_device *dev) +{ + drop_ocr_device(ctx, (fz_ocr_device *)dev); +} +#endif + +fz_device * +fz_new_ocr_device(fz_context *ctx, + fz_device *target, + fz_matrix ctm, + fz_rect mediabox, + int with_list, + const char *language, + const char *datadir, + int (*progress)(fz_context *, void *, int), + void *progress_arg) +{ +#ifdef OCR_DISABLED + fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "OCR Disabled in this build"); +#else + fz_ocr_device *dev; + + if (target == NULL) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "OCR devices require a target"); + + dev = fz_new_derived_device(ctx, fz_ocr_device); + + dev->super.close_device = fz_ocr_close_device; + dev->super.drop_device = fz_ocr_drop_device; + + dev->super.fill_path = fz_ocr_fill_path; + dev->super.stroke_path = fz_ocr_stroke_path; + dev->super.clip_path = fz_ocr_clip_path; + dev->super.clip_stroke_path = fz_ocr_clip_stroke_path; + + dev->super.fill_text = fz_ocr_fill_text; + dev->super.stroke_text = fz_ocr_stroke_text; + dev->super.clip_text = fz_ocr_clip_text; + dev->super.clip_stroke_text = fz_ocr_clip_stroke_text; + dev->super.ignore_text = fz_ocr_ignore_text; + + dev->super.fill_shade = fz_ocr_fill_shade; + dev->super.fill_image = fz_ocr_fill_image; + dev->super.fill_image_mask = fz_ocr_fill_image_mask; + dev->super.clip_image_mask = fz_ocr_clip_image_mask; + + dev->super.pop_clip = fz_ocr_pop_clip; + + dev->super.begin_mask = fz_ocr_begin_mask; + dev->super.end_mask = fz_ocr_end_mask; + dev->super.begin_group = fz_ocr_begin_group; + dev->super.end_group = fz_ocr_end_group; + + dev->super.begin_tile = fz_ocr_begin_tile; + dev->super.end_tile = fz_ocr_end_tile; + + dev->super.render_flags = fz_ocr_render_flags; + dev->super.set_default_colorspaces = fz_ocr_set_default_colorspaces; + dev->super.begin_layer = fz_ocr_begin_layer; + dev->super.end_layer = fz_ocr_end_layer; + + dev->progress = progress; + dev->progress_arg = progress_arg; + + fz_try(ctx) + { + fz_rect bbox; + fz_irect ibox; + fz_point res; + + dev->target = target; + dev->mediabox = mediabox; + dev->ctm = ctm; + + bbox = fz_transform_rect(mediabox, ctm); + ibox = fz_round_rect(bbox); + /* Fudge the width to be a multiple of 4. */ + ibox.x1 += (4-(ibox.x1-ibox.x0)) & 3; + dev->pixmap = fz_new_pixmap_with_bbox(ctx, fz_device_gray(ctx), + ibox, NULL, 0); + fz_clear_pixmap(ctx, dev->pixmap); + res = fz_transform_point_xy(72, 72, ctm); + if (res.x < 0) + res.x = -res.x; + if (res.x < 1) + res.x = 1; + if (res.y < 0) + res.y = -res.y; + if (res.y < 1) + res.y = 1; + fz_set_pixmap_resolution(ctx, dev->pixmap, res.x, res.y); + + dev->language = fz_strdup(ctx, language ? language : "eng"); + dev->datadir = fz_strdup(ctx, datadir ? datadir : ""); + + dev->draw_dev = fz_new_draw_device(ctx, fz_identity, dev->pixmap); + if (with_list) + { + dev->list = fz_new_display_list(ctx, mediabox); + dev->list_dev = fz_new_list_device(ctx, dev->list); + } else + dev->list_dev = dev->target; + } + fz_catch(ctx) + { + drop_ocr_device(ctx, dev); + fz_rethrow(ctx); + } + + return (fz_device*)dev; +#endif +}
