Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccstruct/normalis.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/normalis.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,310 @@ +/********************************************************************** + * File: normalis.h (Formerly denorm.h) + * Description: Code for the DENORM class. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef NORMALIS_H +#define NORMALIS_H + +#include "image.h" + +#include <tesseract/export.h> + +#include <vector> + +struct Pix; + +namespace tesseract { + +const int kBlnCellHeight = 256; // Full-height for baseline normalization. +const int kBlnXHeight = 128; // x-height for baseline normalization. +const int kBlnBaselineOffset = 64; // offset for baseline normalization. + +class BLOCK; +class FCOORD; +class TBOX; +class UNICHARSET; + +struct TBLOB; +struct TPOINT; + +// Possible normalization methods. Use NEGATIVE values as these also +// double up as markers for the last sub-classifier. +enum NormalizationMode { + NM_BASELINE = -3, // The original BL normalization mode. + NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. + NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. +}; + +class TESS_API DENORM { +public: + DENORM(); + + // Copying a DENORM is allowed. + DENORM(const DENORM &); + DENORM &operator=(const DENORM &); + ~DENORM(); + + // Setup the normalization transformation parameters. + // The normalizations applied to a blob are as follows: + // 1. An optional block layout rotation that was applied during layout + // analysis to make the textlines horizontal. + // 2. A normalization transformation (LocalNormTransform): + // Subtract the "origin" + // Apply an x,y scaling. + // Apply an optional rotation. + // Add back a final translation. + // The origin is in the block-rotated space, and is usually something like + // the x-middle of the word at the baseline. + // 3. Zero or more further normalization transformations that are applied + // in sequence, with a similar pattern to the first normalization transform. + // + // A DENORM holds the parameters of a single normalization, and can execute + // both the LocalNormTransform (a forwards normalization), and the + // LocalDenormTransform which is an inverse transform or de-normalization. + // A DENORM may point to a predecessor DENORM, which is actually the earlier + // normalization, so the full normalization sequence involves executing all + // predecessors first and then the transform in "this". + // Let x be image coordinates and that we have normalization classes A, B, C + // where we first apply A then B then C to get normalized x': + // x' = CBAx + // Then the backwards (to original coordinates) would be: + // x = A^-1 B^-1 C^-1 x' + // and A = B->predecessor_ and B = C->predecessor_ + // NormTransform executes all predecessors recursively, and then this. + // NormTransform would be used to transform an image-based feature to + // normalized space for use in a classifier + // DenormTransform inverts this and then all predecessors. It can be + // used to get back to the original image coordinates from normalized space. + // The LocalNormTransform member executes just the transformation + // in "this" without the layout rotation or any predecessors. It would be + // used to run each successive normalization, eg the word normalization, + // and later the character normalization. + + // Arguments: + // block: if not nullptr, then this is the first transformation, and + // block->re_rotation() needs to be used after the Denorm + // transformation to get back to the image coords. + // rotation: if not nullptr, apply this rotation after translation to the + // origin and scaling. (Usually a classify rotation.) + // predecessor: if not nullptr, then predecessor has been applied to the + // input space and needs to be undone to complete the inverse. + // The above pointers are not owned by this DENORM and are assumed to live + // longer than this denorm, except rotation, which is deep copied on input. + // + // x_origin: The x origin which will be mapped to final_xshift in the result. + // y_origin: The y origin which will be mapped to final_yshift in the result. + // Added to result of row->baseline(x) if not nullptr. + // + // x_scale: scale factor for the x-coordinate. + // y_scale: scale factor for the y-coordinate. Ignored if segs is given. + // Note that these scale factors apply to the same x and y system as the + // x-origin and y-origin apply, ie after any block rotation, but before + // the rotation argument is applied. + // + // final_xshift: The x component of the final translation. + // final_yshift: The y component of the final translation. + // + // In theory, any of the commonly used normalizations can be setup here: + // * Traditional baseline normalization on a word: + // SetupNormalization(block, nullptr, nullptr, + // box.x_middle(), baseline, + // kBlnXHeight / x_height, kBlnXHeight / x_height, + // 0, kBlnBaselineOffset); + // * "Numeric mode" baseline normalization on a word, in which the blobs + // are positioned with the bottom as the baseline is achieved by making + // a separate DENORM for each blob. + // SetupNormalization(block, nullptr, nullptr, + // box.x_middle(), box.bottom(), + // kBlnXHeight / x_height, kBlnXHeight / x_height, + // 0, kBlnBaselineOffset); + // * Anisotropic character normalization used by IntFx. + // SetupNormalization(nullptr, nullptr, denorm, + // centroid_x, centroid_y, + // 51.2 / ry, 51.2 / rx, 128, 128); + // * Normalize blob height to x-height (current OSD): + // SetupNormalization(nullptr, &rotation, nullptr, + // box.rotational_x_middle(rotation), + // box.rotational_y_middle(rotation), + // kBlnXHeight / box.rotational_height(rotation), + // kBlnXHeight / box.rotational_height(rotation), + // 0, kBlnBaselineOffset); + // * Secondary normalization for classification rotation (current): + // FCOORD rotation = block->classify_rotation(); + // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; + // SetupNormalization(nullptr, &rotation, denorm, + // box.rotational_x_middle(rotation), + // box.rotational_y_middle(rotation), + // target_height / box.rotational_height(rotation), + // target_height / box.rotational_height(rotation), + // 0, kBlnBaselineOffset); + // * Proposed new normalizations for CJK: Between them there is then + // no need for further normalization at all, and the character fills the cell. + // ** Replacement for baseline normalization on a word: + // Scales height and width independently so that modal height and pitch + // fill the cell respectively. + // float cap_height = x_height / CCStruct::kXHeightCapRatio; + // SetupNormalization(block, nullptr, nullptr, + // box.x_middle(), cap_height / 2.0f, + // kBlnCellHeight / fixed_pitch, + // kBlnCellHeight / cap_height, + // 0, 0); + // ** Secondary normalization for classification (with rotation) (proposed): + // Requires a simple translation to the center of the appropriate character + // cell, no further scaling and a simple rotation (or nothing) about the + // cell center. + // FCOORD rotation = block->classify_rotation(); + // SetupNormalization(nullptr, &rotation, denorm, + // fixed_pitch_cell_center, + // 0.0f, + // 1.0f, + // 1.0f, + // 0, 0); + void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, + float x_origin, float y_origin, float x_scale, float y_scale, + float final_xshift, float final_yshift); + + // Sets up the DENORM to execute a non-linear transformation based on + // preserving an even distribution of stroke edges. The transformation + // operates only within the given box, scaling input coords within the box + // non-linearly to a box of target_width by target_height, with all other + // coords being clipped to the box edge. As with SetupNormalization above, + // final_xshift and final_yshift are applied after scaling, and the bottom- + // left of box is used as a pre-scaling origin. + // x_coords is a collection of the x-coords of vertical edges for each + // y-coord starting at box.bottom(). + // y_coords is a collection of the y-coords of horizontal edges for each + // x-coord starting at box.left(). + // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. + // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. + // The second-level vectors must all be sorted in ascending order. + void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, + float target_height, float final_xshift, float final_yshift, + const std::vector<std::vector<int>> &x_coords, + const std::vector<std::vector<int>> &y_coords); + + // Transforms the given coords one step forward to normalized space, without + // using any block rotation or predecessor. + void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const; + void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const; + // Transforms the given coords forward to normalized space using the + // full transformation sequence defined by the block rotation, the + // predecessors, deepest first, and finally this. If first_norm is not + // nullptr, then the first and deepest transformation used is first_norm, + // ending with this, and the block rotation will not be applied. + void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const; + void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const; + // Transforms the given coords one step back to source space, without + // using to any block rotation or predecessor. + void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const; + void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const; + // Transforms the given coords all the way back to source image space using + // the full transformation sequence defined by this and its predecessors + // recursively, shallowest first, and finally any block re_rotation. + // If last_denorm is not nullptr, then the last transformation used will + // be last_denorm, and the block re_rotation will never be executed. + void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const; + void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const; + + // Normalize a blob using blob transformations. Less accurate, but + // more accurately copies the old way. + void LocalNormBlob(TBLOB *blob) const; + + // Fills in the x-height range accepted by the given unichar_id in blob + // coordinates, given its bounding box in the usual baseline-normalized + // coordinates, with some initial crude x-height estimate (such as word + // size) and this denoting the transformation that was used. + // Also returns the amount the character must have shifted up or down. + void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, + float *max_xht, float *yshift) const; + + // Prints the content of the DENORM for debug purposes. + void Print() const; + + Image pix() const { + return pix_; + } + void set_pix(Image pix) { + pix_ = pix; + } + bool inverse() const { + return inverse_; + } + void set_inverse(bool value) { + inverse_ = value; + } + const DENORM *RootDenorm() const { + if (predecessor_ != nullptr) { + return predecessor_->RootDenorm(); + } + return this; + } + const DENORM *predecessor() const { + return predecessor_; + } + // Accessors - perhaps should not be needed. + float x_scale() const { + return x_scale_; + } + float y_scale() const { + return y_scale_; + } + const BLOCK *block() const { + return block_; + } + void set_block(const BLOCK *block) { + block_ = block; + } + +private: + // Free allocated memory and clear pointers. + void Clear(); + // Setup default values. + void Init(); + + // Best available image. + Image pix_; + // True if the source image is white-on-black. + bool inverse_; + // Block the word came from. If not null, block->re_rotation() takes the + // "untransformed" coordinates even further back to the original image. + // Used only on the first DENORM in a chain. + const BLOCK *block_; + // Rotation to apply between translation to the origin and scaling. + const FCOORD *rotation_; + // Previous transformation in a chain. + const DENORM *predecessor_; + // Non-linear transformation maps directly from each integer offset from the + // origin to the corresponding x-coord. Owned by the DENORM. + std::vector<float> *x_map_; + // Non-linear transformation maps directly from each integer offset from the + // origin to the corresponding y-coord. Owned by the DENORM. + std::vector<float> *y_map_; + // x-coordinate to be mapped to final_xshift_ in the result. + float x_origin_; + // y-coordinate to be mapped to final_yshift_ in the result. + float y_origin_; + // Scale factors for x and y coords. Applied to pre-rotation system. + float x_scale_; + float y_scale_; + // Destination coords of the x_origin_ and y_origin_. + float final_xshift_; + float final_yshift_; +}; + +} // namespace tesseract + +#endif
