Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/boxword.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: boxword.cpp | |
| 3 // Description: Class to represent the bounding boxes of the output. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2010, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #include "boxword.h" | |
| 20 #include "blobs.h" | |
| 21 #include "host.h" // for NearlyEqual | |
| 22 #include "normalis.h" | |
| 23 #include "ocrblock.h" | |
| 24 #include "pageres.h" | |
| 25 | |
| 26 namespace tesseract { | |
| 27 | |
| 28 // Clip output boxes to input blob boxes for bounds that are within this | |
| 29 // tolerance. Otherwise, the blob may be chopped and we have to just use | |
| 30 // the word bounding box. | |
| 31 const int kBoxClipTolerance = 2; | |
| 32 | |
| 33 BoxWord::BoxWord() : length_(0) {} | |
| 34 | |
| 35 BoxWord::BoxWord(const BoxWord &src) { | |
| 36 CopyFrom(src); | |
| 37 } | |
| 38 | |
| 39 BoxWord &BoxWord::operator=(const BoxWord &src) { | |
| 40 CopyFrom(src); | |
| 41 return *this; | |
| 42 } | |
| 43 | |
| 44 void BoxWord::CopyFrom(const BoxWord &src) { | |
| 45 bbox_ = src.bbox_; | |
| 46 length_ = src.length_; | |
| 47 boxes_.clear(); | |
| 48 boxes_.reserve(length_); | |
| 49 for (unsigned i = 0; i < length_; ++i) { | |
| 50 boxes_.push_back(src.boxes_[i]); | |
| 51 } | |
| 52 } | |
| 53 | |
| 54 // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to | |
| 55 // switch back to original image coordinates. | |
| 56 BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) { | |
| 57 auto *boxword = new BoxWord(); | |
| 58 // Count the blobs. | |
| 59 boxword->length_ = tessword->NumBlobs(); | |
| 60 // Allocate memory. | |
| 61 boxword->boxes_.reserve(boxword->length_); | |
| 62 | |
| 63 for (unsigned b = 0; b < boxword->length_; ++b) { | |
| 64 TBLOB *tblob = tessword->blobs[b]; | |
| 65 TBOX blob_box; | |
| 66 for (TESSLINE *outline = tblob->outlines; outline != nullptr; | |
| 67 outline = outline->next) { | |
| 68 EDGEPT *edgept = outline->loop; | |
| 69 // Iterate over the edges. | |
| 70 do { | |
| 71 if (!edgept->IsHidden() || !edgept->prev->IsHidden()) { | |
| 72 ICOORD pos(edgept->pos.x, edgept->pos.y); | |
| 73 TPOINT denormed; | |
| 74 tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed); | |
| 75 pos.set_x(denormed.x); | |
| 76 pos.set_y(denormed.y); | |
| 77 TBOX pt_box(pos, pos); | |
| 78 blob_box += pt_box; | |
| 79 } | |
| 80 edgept = edgept->next; | |
| 81 } while (edgept != outline->loop); | |
| 82 } | |
| 83 boxword->boxes_.push_back(blob_box); | |
| 84 } | |
| 85 boxword->ComputeBoundingBox(); | |
| 86 return boxword; | |
| 87 } | |
| 88 | |
| 89 // Clean up the bounding boxes from the polygonal approximation by | |
| 90 // expanding slightly, then clipping to the blobs from the original_word | |
| 91 // that overlap. If not null, the block provides the inverse rotation. | |
| 92 void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) { | |
| 93 for (unsigned i = 0; i < length_; ++i) { | |
| 94 TBOX box = boxes_[i]; | |
| 95 // Expand by a single pixel, as the poly approximation error is 1 pixel. | |
| 96 box = | |
| 97 TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1); | |
| 98 // Now find the original box that matches. | |
| 99 TBOX original_box; | |
| 100 C_BLOB_IT b_it(original_word->cblob_list()); | |
| 101 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { | |
| 102 TBOX blob_box = b_it.data()->bounding_box(); | |
| 103 if (block != nullptr) { | |
| 104 blob_box.rotate(block->re_rotation()); | |
| 105 } | |
| 106 if (blob_box.major_overlap(box)) { | |
| 107 original_box += blob_box; | |
| 108 } | |
| 109 } | |
| 110 if (!original_box.null_box()) { | |
| 111 if (NearlyEqual<int>(original_box.left(), box.left(), | |
| 112 kBoxClipTolerance)) { | |
| 113 box.set_left(original_box.left()); | |
| 114 } | |
| 115 if (NearlyEqual<int>(original_box.right(), box.right(), | |
| 116 kBoxClipTolerance)) { | |
| 117 box.set_right(original_box.right()); | |
| 118 } | |
| 119 if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) { | |
| 120 box.set_top(original_box.top()); | |
| 121 } | |
| 122 if (NearlyEqual<int>(original_box.bottom(), box.bottom(), | |
| 123 kBoxClipTolerance)) { | |
| 124 box.set_bottom(original_box.bottom()); | |
| 125 } | |
| 126 } | |
| 127 original_box = original_word->bounding_box(); | |
| 128 if (block != nullptr) { | |
| 129 original_box.rotate(block->re_rotation()); | |
| 130 } | |
| 131 boxes_[i] = box.intersection(original_box); | |
| 132 } | |
| 133 ComputeBoundingBox(); | |
| 134 } | |
| 135 | |
| 136 // Merges the boxes from start to end, not including end, and deletes | |
| 137 // the boxes between start and end. | |
| 138 void BoxWord::MergeBoxes(unsigned start, unsigned end) { | |
| 139 start = ClipToRange(start, 0U, length_); | |
| 140 end = ClipToRange(end, 0U, length_); | |
| 141 if (end <= start + 1) { | |
| 142 return; | |
| 143 } | |
| 144 for (unsigned i = start + 1; i < end; ++i) { | |
| 145 boxes_[start] += boxes_[i]; | |
| 146 } | |
| 147 int shrinkage = end - 1 - start; | |
| 148 length_ -= shrinkage; | |
| 149 for (unsigned i = start + 1; i < length_; ++i) { | |
| 150 boxes_[i] = boxes_[i + shrinkage]; | |
| 151 } | |
| 152 boxes_.resize(length_); | |
| 153 } | |
| 154 | |
| 155 // Inserts a new box before the given index. | |
| 156 // Recomputes the bounding box. | |
| 157 void BoxWord::InsertBox(unsigned index, const TBOX &box) { | |
| 158 if (index < length_) { | |
| 159 boxes_.insert(boxes_.begin() + index, box); | |
| 160 } else { | |
| 161 boxes_.push_back(box); | |
| 162 } | |
| 163 length_ = boxes_.size(); | |
| 164 ComputeBoundingBox(); | |
| 165 } | |
| 166 | |
| 167 // Changes the box at the given index to the new box. | |
| 168 // Recomputes the bounding box. | |
| 169 void BoxWord::ChangeBox(unsigned index, const TBOX &box) { | |
| 170 boxes_[index] = box; | |
| 171 ComputeBoundingBox(); | |
| 172 } | |
| 173 | |
| 174 // Deletes the box with the given index, and shuffles up the rest. | |
| 175 // Recomputes the bounding box. | |
| 176 void BoxWord::DeleteBox(unsigned index) { | |
| 177 ASSERT_HOST(index < length_); | |
| 178 boxes_.erase(boxes_.begin() + index); | |
| 179 --length_; | |
| 180 ComputeBoundingBox(); | |
| 181 } | |
| 182 | |
| 183 // Deletes all the boxes stored in BoxWord. | |
| 184 void BoxWord::DeleteAllBoxes() { | |
| 185 length_ = 0; | |
| 186 boxes_.clear(); | |
| 187 bbox_ = TBOX(); | |
| 188 } | |
| 189 | |
| 190 // Computes the bounding box of the word. | |
| 191 void BoxWord::ComputeBoundingBox() { | |
| 192 bbox_ = TBOX(); | |
| 193 for (unsigned i = 0; i < length_; ++i) { | |
| 194 bbox_ += boxes_[i]; | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 // This and other putatively are the same, so call the (permanent) callback | |
| 199 // for each blob index where the bounding boxes match. | |
| 200 // The callback is deleted on completion. | |
| 201 void BoxWord::ProcessMatchedBlobs(const TWERD &other, | |
| 202 const std::function<void(int)> &cb) const { | |
| 203 for (unsigned i = 0; i < length_ && i < other.NumBlobs(); ++i) { | |
| 204 TBOX blob_box = other.blobs[i]->bounding_box(); | |
| 205 if (blob_box == boxes_[i]) { | |
| 206 cb(i); | |
| 207 } | |
| 208 } | |
| 209 } | |
| 210 | |
| 211 } // namespace tesseract. |
