Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/normalis.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: normalis.h (Formerly denorm.h) | |
| 3 * Description: Code for the DENORM class. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1992, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #ifndef NORMALIS_H | |
| 20 #define NORMALIS_H | |
| 21 | |
| 22 #include "image.h" | |
| 23 | |
| 24 #include <tesseract/export.h> | |
| 25 | |
| 26 #include <vector> | |
| 27 | |
| 28 struct Pix; | |
| 29 | |
| 30 namespace tesseract { | |
| 31 | |
| 32 const int kBlnCellHeight = 256; // Full-height for baseline normalization. | |
| 33 const int kBlnXHeight = 128; // x-height for baseline normalization. | |
| 34 const int kBlnBaselineOffset = 64; // offset for baseline normalization. | |
| 35 | |
| 36 class BLOCK; | |
| 37 class FCOORD; | |
| 38 class TBOX; | |
| 39 class UNICHARSET; | |
| 40 | |
| 41 struct TBLOB; | |
| 42 struct TPOINT; | |
| 43 | |
| 44 // Possible normalization methods. Use NEGATIVE values as these also | |
| 45 // double up as markers for the last sub-classifier. | |
| 46 enum NormalizationMode { | |
| 47 NM_BASELINE = -3, // The original BL normalization mode. | |
| 48 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. | |
| 49 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. | |
| 50 }; | |
| 51 | |
| 52 class TESS_API DENORM { | |
| 53 public: | |
| 54 DENORM(); | |
| 55 | |
| 56 // Copying a DENORM is allowed. | |
| 57 DENORM(const DENORM &); | |
| 58 DENORM &operator=(const DENORM &); | |
| 59 ~DENORM(); | |
| 60 | |
| 61 // Setup the normalization transformation parameters. | |
| 62 // The normalizations applied to a blob are as follows: | |
| 63 // 1. An optional block layout rotation that was applied during layout | |
| 64 // analysis to make the textlines horizontal. | |
| 65 // 2. A normalization transformation (LocalNormTransform): | |
| 66 // Subtract the "origin" | |
| 67 // Apply an x,y scaling. | |
| 68 // Apply an optional rotation. | |
| 69 // Add back a final translation. | |
| 70 // The origin is in the block-rotated space, and is usually something like | |
| 71 // the x-middle of the word at the baseline. | |
| 72 // 3. Zero or more further normalization transformations that are applied | |
| 73 // in sequence, with a similar pattern to the first normalization transform. | |
| 74 // | |
| 75 // A DENORM holds the parameters of a single normalization, and can execute | |
| 76 // both the LocalNormTransform (a forwards normalization), and the | |
| 77 // LocalDenormTransform which is an inverse transform or de-normalization. | |
| 78 // A DENORM may point to a predecessor DENORM, which is actually the earlier | |
| 79 // normalization, so the full normalization sequence involves executing all | |
| 80 // predecessors first and then the transform in "this". | |
| 81 // Let x be image coordinates and that we have normalization classes A, B, C | |
| 82 // where we first apply A then B then C to get normalized x': | |
| 83 // x' = CBAx | |
| 84 // Then the backwards (to original coordinates) would be: | |
| 85 // x = A^-1 B^-1 C^-1 x' | |
| 86 // and A = B->predecessor_ and B = C->predecessor_ | |
| 87 // NormTransform executes all predecessors recursively, and then this. | |
| 88 // NormTransform would be used to transform an image-based feature to | |
| 89 // normalized space for use in a classifier | |
| 90 // DenormTransform inverts this and then all predecessors. It can be | |
| 91 // used to get back to the original image coordinates from normalized space. | |
| 92 // The LocalNormTransform member executes just the transformation | |
| 93 // in "this" without the layout rotation or any predecessors. It would be | |
| 94 // used to run each successive normalization, eg the word normalization, | |
| 95 // and later the character normalization. | |
| 96 | |
| 97 // Arguments: | |
| 98 // block: if not nullptr, then this is the first transformation, and | |
| 99 // block->re_rotation() needs to be used after the Denorm | |
| 100 // transformation to get back to the image coords. | |
| 101 // rotation: if not nullptr, apply this rotation after translation to the | |
| 102 // origin and scaling. (Usually a classify rotation.) | |
| 103 // predecessor: if not nullptr, then predecessor has been applied to the | |
| 104 // input space and needs to be undone to complete the inverse. | |
| 105 // The above pointers are not owned by this DENORM and are assumed to live | |
| 106 // longer than this denorm, except rotation, which is deep copied on input. | |
| 107 // | |
| 108 // x_origin: The x origin which will be mapped to final_xshift in the result. | |
| 109 // y_origin: The y origin which will be mapped to final_yshift in the result. | |
| 110 // Added to result of row->baseline(x) if not nullptr. | |
| 111 // | |
| 112 // x_scale: scale factor for the x-coordinate. | |
| 113 // y_scale: scale factor for the y-coordinate. Ignored if segs is given. | |
| 114 // Note that these scale factors apply to the same x and y system as the | |
| 115 // x-origin and y-origin apply, ie after any block rotation, but before | |
| 116 // the rotation argument is applied. | |
| 117 // | |
| 118 // final_xshift: The x component of the final translation. | |
| 119 // final_yshift: The y component of the final translation. | |
| 120 // | |
| 121 // In theory, any of the commonly used normalizations can be setup here: | |
| 122 // * Traditional baseline normalization on a word: | |
| 123 // SetupNormalization(block, nullptr, nullptr, | |
| 124 // box.x_middle(), baseline, | |
| 125 // kBlnXHeight / x_height, kBlnXHeight / x_height, | |
| 126 // 0, kBlnBaselineOffset); | |
| 127 // * "Numeric mode" baseline normalization on a word, in which the blobs | |
| 128 // are positioned with the bottom as the baseline is achieved by making | |
| 129 // a separate DENORM for each blob. | |
| 130 // SetupNormalization(block, nullptr, nullptr, | |
| 131 // box.x_middle(), box.bottom(), | |
| 132 // kBlnXHeight / x_height, kBlnXHeight / x_height, | |
| 133 // 0, kBlnBaselineOffset); | |
| 134 // * Anisotropic character normalization used by IntFx. | |
| 135 // SetupNormalization(nullptr, nullptr, denorm, | |
| 136 // centroid_x, centroid_y, | |
| 137 // 51.2 / ry, 51.2 / rx, 128, 128); | |
| 138 // * Normalize blob height to x-height (current OSD): | |
| 139 // SetupNormalization(nullptr, &rotation, nullptr, | |
| 140 // box.rotational_x_middle(rotation), | |
| 141 // box.rotational_y_middle(rotation), | |
| 142 // kBlnXHeight / box.rotational_height(rotation), | |
| 143 // kBlnXHeight / box.rotational_height(rotation), | |
| 144 // 0, kBlnBaselineOffset); | |
| 145 // * Secondary normalization for classification rotation (current): | |
| 146 // FCOORD rotation = block->classify_rotation(); | |
| 147 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; | |
| 148 // SetupNormalization(nullptr, &rotation, denorm, | |
| 149 // box.rotational_x_middle(rotation), | |
| 150 // box.rotational_y_middle(rotation), | |
| 151 // target_height / box.rotational_height(rotation), | |
| 152 // target_height / box.rotational_height(rotation), | |
| 153 // 0, kBlnBaselineOffset); | |
| 154 // * Proposed new normalizations for CJK: Between them there is then | |
| 155 // no need for further normalization at all, and the character fills the cell. | |
| 156 // ** Replacement for baseline normalization on a word: | |
| 157 // Scales height and width independently so that modal height and pitch | |
| 158 // fill the cell respectively. | |
| 159 // float cap_height = x_height / CCStruct::kXHeightCapRatio; | |
| 160 // SetupNormalization(block, nullptr, nullptr, | |
| 161 // box.x_middle(), cap_height / 2.0f, | |
| 162 // kBlnCellHeight / fixed_pitch, | |
| 163 // kBlnCellHeight / cap_height, | |
| 164 // 0, 0); | |
| 165 // ** Secondary normalization for classification (with rotation) (proposed): | |
| 166 // Requires a simple translation to the center of the appropriate character | |
| 167 // cell, no further scaling and a simple rotation (or nothing) about the | |
| 168 // cell center. | |
| 169 // FCOORD rotation = block->classify_rotation(); | |
| 170 // SetupNormalization(nullptr, &rotation, denorm, | |
| 171 // fixed_pitch_cell_center, | |
| 172 // 0.0f, | |
| 173 // 1.0f, | |
| 174 // 1.0f, | |
| 175 // 0, 0); | |
| 176 void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, | |
| 177 float x_origin, float y_origin, float x_scale, float y_scale, | |
| 178 float final_xshift, float final_yshift); | |
| 179 | |
| 180 // Sets up the DENORM to execute a non-linear transformation based on | |
| 181 // preserving an even distribution of stroke edges. The transformation | |
| 182 // operates only within the given box, scaling input coords within the box | |
| 183 // non-linearly to a box of target_width by target_height, with all other | |
| 184 // coords being clipped to the box edge. As with SetupNormalization above, | |
| 185 // final_xshift and final_yshift are applied after scaling, and the bottom- | |
| 186 // left of box is used as a pre-scaling origin. | |
| 187 // x_coords is a collection of the x-coords of vertical edges for each | |
| 188 // y-coord starting at box.bottom(). | |
| 189 // y_coords is a collection of the y-coords of horizontal edges for each | |
| 190 // x-coord starting at box.left(). | |
| 191 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. | |
| 192 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. | |
| 193 // The second-level vectors must all be sorted in ascending order. | |
| 194 void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, | |
| 195 float target_height, float final_xshift, float final_yshift, | |
| 196 const std::vector<std::vector<int>> &x_coords, | |
| 197 const std::vector<std::vector<int>> &y_coords); | |
| 198 | |
| 199 // Transforms the given coords one step forward to normalized space, without | |
| 200 // using any block rotation or predecessor. | |
| 201 void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const; | |
| 202 void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const; | |
| 203 // Transforms the given coords forward to normalized space using the | |
| 204 // full transformation sequence defined by the block rotation, the | |
| 205 // predecessors, deepest first, and finally this. If first_norm is not | |
| 206 // nullptr, then the first and deepest transformation used is first_norm, | |
| 207 // ending with this, and the block rotation will not be applied. | |
| 208 void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const; | |
| 209 void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const; | |
| 210 // Transforms the given coords one step back to source space, without | |
| 211 // using to any block rotation or predecessor. | |
| 212 void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const; | |
| 213 void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const; | |
| 214 // Transforms the given coords all the way back to source image space using | |
| 215 // the full transformation sequence defined by this and its predecessors | |
| 216 // recursively, shallowest first, and finally any block re_rotation. | |
| 217 // If last_denorm is not nullptr, then the last transformation used will | |
| 218 // be last_denorm, and the block re_rotation will never be executed. | |
| 219 void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const; | |
| 220 void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const; | |
| 221 | |
| 222 // Normalize a blob using blob transformations. Less accurate, but | |
| 223 // more accurately copies the old way. | |
| 224 void LocalNormBlob(TBLOB *blob) const; | |
| 225 | |
| 226 // Fills in the x-height range accepted by the given unichar_id in blob | |
| 227 // coordinates, given its bounding box in the usual baseline-normalized | |
| 228 // coordinates, with some initial crude x-height estimate (such as word | |
| 229 // size) and this denoting the transformation that was used. | |
| 230 // Also returns the amount the character must have shifted up or down. | |
| 231 void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, | |
| 232 float *max_xht, float *yshift) const; | |
| 233 | |
| 234 // Prints the content of the DENORM for debug purposes. | |
| 235 void Print() const; | |
| 236 | |
| 237 Image pix() const { | |
| 238 return pix_; | |
| 239 } | |
| 240 void set_pix(Image pix) { | |
| 241 pix_ = pix; | |
| 242 } | |
| 243 bool inverse() const { | |
| 244 return inverse_; | |
| 245 } | |
| 246 void set_inverse(bool value) { | |
| 247 inverse_ = value; | |
| 248 } | |
| 249 const DENORM *RootDenorm() const { | |
| 250 if (predecessor_ != nullptr) { | |
| 251 return predecessor_->RootDenorm(); | |
| 252 } | |
| 253 return this; | |
| 254 } | |
| 255 const DENORM *predecessor() const { | |
| 256 return predecessor_; | |
| 257 } | |
| 258 // Accessors - perhaps should not be needed. | |
| 259 float x_scale() const { | |
| 260 return x_scale_; | |
| 261 } | |
| 262 float y_scale() const { | |
| 263 return y_scale_; | |
| 264 } | |
| 265 const BLOCK *block() const { | |
| 266 return block_; | |
| 267 } | |
| 268 void set_block(const BLOCK *block) { | |
| 269 block_ = block; | |
| 270 } | |
| 271 | |
| 272 private: | |
| 273 // Free allocated memory and clear pointers. | |
| 274 void Clear(); | |
| 275 // Setup default values. | |
| 276 void Init(); | |
| 277 | |
| 278 // Best available image. | |
| 279 Image pix_; | |
| 280 // True if the source image is white-on-black. | |
| 281 bool inverse_; | |
| 282 // Block the word came from. If not null, block->re_rotation() takes the | |
| 283 // "untransformed" coordinates even further back to the original image. | |
| 284 // Used only on the first DENORM in a chain. | |
| 285 const BLOCK *block_; | |
| 286 // Rotation to apply between translation to the origin and scaling. | |
| 287 const FCOORD *rotation_; | |
| 288 // Previous transformation in a chain. | |
| 289 const DENORM *predecessor_; | |
| 290 // Non-linear transformation maps directly from each integer offset from the | |
| 291 // origin to the corresponding x-coord. Owned by the DENORM. | |
| 292 std::vector<float> *x_map_; | |
| 293 // Non-linear transformation maps directly from each integer offset from the | |
| 294 // origin to the corresponding y-coord. Owned by the DENORM. | |
| 295 std::vector<float> *y_map_; | |
| 296 // x-coordinate to be mapped to final_xshift_ in the result. | |
| 297 float x_origin_; | |
| 298 // y-coordinate to be mapped to final_yshift_ in the result. | |
| 299 float y_origin_; | |
| 300 // Scale factors for x and y coords. Applied to pre-rotation system. | |
| 301 float x_scale_; | |
| 302 float y_scale_; | |
| 303 // Destination coords of the x_origin_ and y_origin_. | |
| 304 float final_xshift_; | |
| 305 float final_yshift_; | |
| 306 }; | |
| 307 | |
| 308 } // namespace tesseract | |
| 309 | |
| 310 #endif |
