Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/wordrec/associate.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: associate.h | |
| 3 // Description: Structs, classes, typedefs useful for the segmentation | |
| 4 // search. Functions for scoring segmentation paths according | |
| 5 // to their character widths, gap widths and seam cuts. | |
| 6 // Author: Daria Antonova | |
| 7 // Created: Mon Mar 8 11:26:43 PDT 2010 | |
| 8 // | |
| 9 // (C) Copyright 2010, Google Inc. | |
| 10 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 11 // you may not use this file except in compliance with the License. | |
| 12 // You may obtain a copy of the License at | |
| 13 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 14 // Unless required by applicable law or agreed to in writing, software | |
| 15 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 17 // See the License for the specific language governing permissions and | |
| 18 // limitations under the License. | |
| 19 // | |
| 20 /////////////////////////////////////////////////////////////////////// | |
| 21 | |
| 22 #ifndef ASSOCIATE_H | |
| 23 #define ASSOCIATE_H | |
| 24 | |
| 25 #include "blobs.h" | |
| 26 #include "elst.h" | |
| 27 #include "ratngs.h" | |
| 28 #include "seam.h" | |
| 29 #include "split.h" | |
| 30 | |
| 31 namespace tesseract { | |
| 32 | |
| 33 class WERD_RES; | |
| 34 | |
| 35 // Statistics about character widths, gaps and seams. | |
| 36 struct AssociateStats { | |
| 37 AssociateStats() { | |
| 38 Clear(); | |
| 39 } | |
| 40 | |
| 41 void Clear() { | |
| 42 shape_cost = 0.0f; | |
| 43 bad_shape = false; | |
| 44 full_wh_ratio = 0.0f; | |
| 45 full_wh_ratio_total = 0.0f; | |
| 46 full_wh_ratio_var = 0.0f; | |
| 47 bad_fixed_pitch_right_gap = false; | |
| 48 bad_fixed_pitch_wh_ratio = false; | |
| 49 gap_sum = 0; | |
| 50 } | |
| 51 | |
| 52 void Print() { | |
| 53 tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); | |
| 54 } | |
| 55 | |
| 56 float shape_cost; // cost of blob shape | |
| 57 bool bad_shape; // true if the shape of the blob is unacceptable | |
| 58 float full_wh_ratio; // width-to-height ratio + gap on the right | |
| 59 float full_wh_ratio_total; // sum of width-to-height ratios | |
| 60 // on the path terminating at this blob | |
| 61 float full_wh_ratio_var; // variance of full_wh_ratios on the path | |
| 62 bool bad_fixed_pitch_right_gap; // true if there is no gap before | |
| 63 // the blob on the right | |
| 64 bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-height | |
| 65 // ratio > kMaxFixedPitchCharAspectRatio | |
| 66 int gap_sum; // sum of gaps within the blob | |
| 67 }; | |
| 68 | |
| 69 // Utility functions for scoring segmentation paths according to their | |
| 70 // character widths, gap widths, seam characteristics. | |
| 71 class AssociateUtils { | |
| 72 public: | |
| 73 static const float kMaxFixedPitchCharAspectRatio; | |
| 74 static const float kMinGap; | |
| 75 | |
| 76 // Returns outline length of the given blob is computed as: | |
| 77 // rating_cert_scale * rating / certainty | |
| 78 // Since from Wordrec::SegSearch() in segsearch.cpp | |
| 79 // rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale | |
| 80 // And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp | |
| 81 // Rating = Certainty = next.rating | |
| 82 // Rating *= rating_scale * Results->BlobLength | |
| 83 // Certainty *= -(getDict().certainty_scale) | |
| 84 static inline float ComputeOutlineLength(float rating_cert_scale, const BLOB_CHOICE &b) { | |
| 85 return rating_cert_scale * b.rating() / b.certainty(); | |
| 86 } | |
| 87 static inline float ComputeRating(float rating_cert_scale, float cert, int width) { | |
| 88 return static_cast<float>(width) * cert / rating_cert_scale; | |
| 89 } | |
| 90 | |
| 91 // Computes character widths, gaps and seams stats given the | |
| 92 // AssociateStats of the path so far, col, row of the blob that | |
| 93 // is being added to the path, and WERD_RES containing information | |
| 94 // about character widths, gaps and seams. | |
| 95 // Fills associate_cost with the combined shape, gap and seam cost | |
| 96 // of adding a unichar from (col, row) to the path (note that since | |
| 97 // this function could be used to compute the prioritization for | |
| 98 // pain points, (col, row) entry might not be classified yet; thus | |
| 99 // information in the (col, row) entry of the ratings matrix is not used). | |
| 100 // | |
| 101 // Note: the function assumes that word_res, stats and | |
| 102 // associate_cost pointers are not nullptr. | |
| 103 static void ComputeStats(int col, int row, const AssociateStats *parent_stats, | |
| 104 int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, | |
| 105 WERD_RES *word_res, bool debug, AssociateStats *stats); | |
| 106 | |
| 107 // Returns the width cost for fixed-pitch text. | |
| 108 static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, | |
| 109 float max_char_wh_ratio); | |
| 110 | |
| 111 // Returns the gap cost for fixed-pitch text (penalizes vertically | |
| 112 // overlapping components). | |
| 113 static inline float FixedPitchGapCost(float norm_gap, bool end_pos) { | |
| 114 return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f; | |
| 115 } | |
| 116 }; | |
| 117 | |
| 118 } // namespace tesseract | |
| 119 | |
| 120 #endif |
