Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccmain/pageiterator.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: pageiterator.cpp | |
| 3 // Description: Iterator for tesseract page structure that avoids using | |
| 4 // tesseract internal data structures. | |
| 5 // Author: Ray Smith | |
| 6 // | |
| 7 // (C) Copyright 2010, Google Inc. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #include <allheaders.h> | |
| 21 #include <tesseract/pageiterator.h> | |
| 22 #include "helpers.h" | |
| 23 #include "pageres.h" | |
| 24 #include "tesseractclass.h" | |
| 25 | |
| 26 #include <algorithm> | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 PageIterator::PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, | |
| 31 int scaled_yres, int rect_left, int rect_top, | |
| 32 int rect_width, int rect_height) | |
| 33 : page_res_(page_res), | |
| 34 tesseract_(tesseract), | |
| 35 word_(nullptr), | |
| 36 word_length_(0), | |
| 37 blob_index_(0), | |
| 38 cblob_it_(nullptr), | |
| 39 include_upper_dots_(false), | |
| 40 include_lower_dots_(false), | |
| 41 scale_(scale), | |
| 42 scaled_yres_(scaled_yres), | |
| 43 rect_left_(rect_left), | |
| 44 rect_top_(rect_top), | |
| 45 rect_width_(rect_width), | |
| 46 rect_height_(rect_height) { | |
| 47 it_ = new PAGE_RES_IT(page_res); | |
| 48 PageIterator::Begin(); | |
| 49 } | |
| 50 | |
| 51 PageIterator::~PageIterator() { | |
| 52 delete it_; | |
| 53 delete cblob_it_; | |
| 54 } | |
| 55 | |
| 56 /** | |
| 57 * PageIterators may be copied! This makes it possible to iterate over | |
| 58 * all the objects at a lower level, while maintaining an iterator to | |
| 59 * objects at a higher level. | |
| 60 */ | |
| 61 PageIterator::PageIterator(const PageIterator &src) | |
| 62 : page_res_(src.page_res_), | |
| 63 tesseract_(src.tesseract_), | |
| 64 word_(nullptr), | |
| 65 word_length_(src.word_length_), | |
| 66 blob_index_(src.blob_index_), | |
| 67 cblob_it_(nullptr), | |
| 68 include_upper_dots_(src.include_upper_dots_), | |
| 69 include_lower_dots_(src.include_lower_dots_), | |
| 70 scale_(src.scale_), | |
| 71 scaled_yres_(src.scaled_yres_), | |
| 72 rect_left_(src.rect_left_), | |
| 73 rect_top_(src.rect_top_), | |
| 74 rect_width_(src.rect_width_), | |
| 75 rect_height_(src.rect_height_) { | |
| 76 it_ = new PAGE_RES_IT(*src.it_); | |
| 77 BeginWord(src.blob_index_); | |
| 78 } | |
| 79 | |
| 80 const PageIterator &PageIterator::operator=(const PageIterator &src) { | |
| 81 page_res_ = src.page_res_; | |
| 82 tesseract_ = src.tesseract_; | |
| 83 include_upper_dots_ = src.include_upper_dots_; | |
| 84 include_lower_dots_ = src.include_lower_dots_; | |
| 85 scale_ = src.scale_; | |
| 86 scaled_yres_ = src.scaled_yres_; | |
| 87 rect_left_ = src.rect_left_; | |
| 88 rect_top_ = src.rect_top_; | |
| 89 rect_width_ = src.rect_width_; | |
| 90 rect_height_ = src.rect_height_; | |
| 91 delete it_; | |
| 92 it_ = new PAGE_RES_IT(*src.it_); | |
| 93 BeginWord(src.blob_index_); | |
| 94 return *this; | |
| 95 } | |
| 96 | |
| 97 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT *other) const { | |
| 98 return (it_ == nullptr && it_ == other) || | |
| 99 ((other != nullptr) && (it_ != nullptr) && (*it_ == *other)); | |
| 100 } | |
| 101 | |
| 102 // ============= Moving around within the page ============. | |
| 103 | |
| 104 /** Resets the iterator to point to the start of the page. */ | |
| 105 void PageIterator::Begin() { | |
| 106 it_->restart_page_with_empties(); | |
| 107 BeginWord(0); | |
| 108 } | |
| 109 | |
| 110 void PageIterator::RestartParagraph() { | |
| 111 if (it_->block() == nullptr) { | |
| 112 return; // At end of the document. | |
| 113 } | |
| 114 PAGE_RES_IT para(page_res_); | |
| 115 PAGE_RES_IT next_para(para); | |
| 116 next_para.forward_paragraph(); | |
| 117 while (next_para.cmp(*it_) <= 0) { | |
| 118 para = next_para; | |
| 119 next_para.forward_paragraph(); | |
| 120 } | |
| 121 *it_ = para; | |
| 122 BeginWord(0); | |
| 123 } | |
| 124 | |
| 125 bool PageIterator::IsWithinFirstTextlineOfParagraph() const { | |
| 126 PageIterator p_start(*this); | |
| 127 p_start.RestartParagraph(); | |
| 128 return p_start.it_->row() == it_->row(); | |
| 129 } | |
| 130 | |
| 131 void PageIterator::RestartRow() { | |
| 132 it_->restart_row(); | |
| 133 BeginWord(0); | |
| 134 } | |
| 135 | |
| 136 /** | |
| 137 * Moves to the start of the next object at the given level in the | |
| 138 * page hierarchy, and returns false if the end of the page was reached. | |
| 139 * NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each | |
| 140 * non-text block at least once. | |
| 141 * Think of non text blocks as containing a single para, with at least one | |
| 142 * line, with a single imaginary word, containing a single symbol. | |
| 143 * The bounding boxes mark out any polygonal nature of the block, and | |
| 144 * PTIsTextType(BLockType()) is false for non-text blocks. | |
| 145 * Calls to Next with different levels may be freely intermixed. | |
| 146 * This function iterates words in right-to-left scripts correctly, if | |
| 147 * the appropriate language has been loaded into Tesseract. | |
| 148 */ | |
| 149 bool PageIterator::Next(PageIteratorLevel level) { | |
| 150 if (it_->block() == nullptr) { | |
| 151 return false; // Already at the end! | |
| 152 } | |
| 153 if (it_->word() == nullptr) { | |
| 154 level = RIL_BLOCK; | |
| 155 } | |
| 156 | |
| 157 switch (level) { | |
| 158 case RIL_BLOCK: | |
| 159 it_->forward_block(); | |
| 160 break; | |
| 161 case RIL_PARA: | |
| 162 it_->forward_paragraph(); | |
| 163 break; | |
| 164 case RIL_TEXTLINE: | |
| 165 for (it_->forward_with_empties(); it_->row() == it_->prev_row(); | |
| 166 it_->forward_with_empties()) { | |
| 167 ; | |
| 168 } | |
| 169 break; | |
| 170 case RIL_WORD: | |
| 171 it_->forward_with_empties(); | |
| 172 break; | |
| 173 case RIL_SYMBOL: | |
| 174 if (cblob_it_ != nullptr) { | |
| 175 cblob_it_->forward(); | |
| 176 } | |
| 177 ++blob_index_; | |
| 178 if (blob_index_ >= word_length_) { | |
| 179 it_->forward_with_empties(); | |
| 180 } else { | |
| 181 return true; | |
| 182 } | |
| 183 break; | |
| 184 } | |
| 185 BeginWord(0); | |
| 186 return it_->block() != nullptr; | |
| 187 } | |
| 188 | |
| 189 /** | |
| 190 * Returns true if the iterator is at the start of an object at the given | |
| 191 * level. Possible uses include determining if a call to Next(RIL_WORD) | |
| 192 * moved to the start of a RIL_PARA. | |
| 193 */ | |
| 194 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { | |
| 195 if (it_->block() == nullptr) { | |
| 196 return false; // Already at the end! | |
| 197 } | |
| 198 if (it_->word() == nullptr) { | |
| 199 return true; // In an image block. | |
| 200 } | |
| 201 switch (level) { | |
| 202 case RIL_BLOCK: | |
| 203 return blob_index_ == 0 && it_->block() != it_->prev_block(); | |
| 204 case RIL_PARA: | |
| 205 return blob_index_ == 0 && | |
| 206 (it_->block() != it_->prev_block() || | |
| 207 it_->row()->row->para() != it_->prev_row()->row->para()); | |
| 208 case RIL_TEXTLINE: | |
| 209 return blob_index_ == 0 && it_->row() != it_->prev_row(); | |
| 210 case RIL_WORD: | |
| 211 return blob_index_ == 0; | |
| 212 case RIL_SYMBOL: | |
| 213 return true; | |
| 214 } | |
| 215 return false; | |
| 216 } | |
| 217 | |
| 218 /** | |
| 219 * Returns whether the iterator is positioned at the last element in a | |
| 220 * given level. (e.g. the last word in a line, the last line in a block) | |
| 221 */ | |
| 222 bool PageIterator::IsAtFinalElement(PageIteratorLevel level, | |
| 223 PageIteratorLevel element) const { | |
| 224 if (Empty(element)) { | |
| 225 return true; // Already at the end! | |
| 226 } | |
| 227 // The result is true if we step forward by element and find we are | |
| 228 // at the end of the page or at beginning of *all* levels in: | |
| 229 // [level, element). | |
| 230 // When there is more than one level difference between element and level, | |
| 231 // we could for instance move forward one symbol and still be at the first | |
| 232 // word on a line, so we also have to be at the first symbol in a word. | |
| 233 PageIterator next(*this); | |
| 234 next.Next(element); | |
| 235 if (next.Empty(element)) { | |
| 236 return true; // Reached the end of the page. | |
| 237 } | |
| 238 while (element > level) { | |
| 239 element = static_cast<PageIteratorLevel>(element - 1); | |
| 240 if (!next.IsAtBeginningOf(element)) { | |
| 241 return false; | |
| 242 } | |
| 243 } | |
| 244 return true; | |
| 245 } | |
| 246 | |
| 247 /** | |
| 248 * Returns whether this iterator is positioned | |
| 249 * before other: -1 | |
| 250 * equal to other: 0 | |
| 251 * after other: 1 | |
| 252 */ | |
| 253 int PageIterator::Cmp(const PageIterator &other) const { | |
| 254 int word_cmp = it_->cmp(*other.it_); | |
| 255 if (word_cmp != 0) { | |
| 256 return word_cmp; | |
| 257 } | |
| 258 if (blob_index_ < other.blob_index_) { | |
| 259 return -1; | |
| 260 } | |
| 261 if (blob_index_ == other.blob_index_) { | |
| 262 return 0; | |
| 263 } | |
| 264 return 1; | |
| 265 } | |
| 266 | |
| 267 // ============= Accessing data ==============. | |
| 268 // Coordinate system: | |
| 269 // Integer coordinates are at the cracks between the pixels. | |
| 270 // The top-left corner of the top-left pixel in the image is at (0,0). | |
| 271 // The bottom-right corner of the bottom-right pixel in the image is at | |
| 272 // (width, height). | |
| 273 // Every bounding box goes from the top-left of the top-left contained | |
| 274 // pixel to the bottom-right of the bottom-right contained pixel, so | |
| 275 // the bounding box of the single top-left pixel in the image is: | |
| 276 // (0,0)->(1,1). | |
| 277 // If an image rectangle has been set in the API, then returned coordinates | |
| 278 // relate to the original (full) image, rather than the rectangle. | |
| 279 | |
| 280 /** | |
| 281 * Returns the bounding rectangle of the current object at the given level in | |
| 282 * the coordinates of the working image that is pix_binary(). | |
| 283 * See comment on coordinate system above. | |
| 284 * Returns false if there is no such object at the current position. | |
| 285 */ | |
| 286 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, | |
| 287 int *top, int *right, | |
| 288 int *bottom) const { | |
| 289 if (Empty(level)) { | |
| 290 return false; | |
| 291 } | |
| 292 TBOX box; | |
| 293 PARA *para = nullptr; | |
| 294 switch (level) { | |
| 295 case RIL_BLOCK: | |
| 296 box = it_->block()->block->restricted_bounding_box(include_upper_dots_, | |
| 297 include_lower_dots_); | |
| 298 break; | |
| 299 case RIL_PARA: | |
| 300 para = it_->row()->row->para(); | |
| 301 // Fall through. | |
| 302 case RIL_TEXTLINE: | |
| 303 box = it_->row()->row->restricted_bounding_box(include_upper_dots_, | |
| 304 include_lower_dots_); | |
| 305 break; | |
| 306 case RIL_WORD: | |
| 307 box = it_->word()->word->restricted_bounding_box(include_upper_dots_, | |
| 308 include_lower_dots_); | |
| 309 break; | |
| 310 case RIL_SYMBOL: | |
| 311 if (cblob_it_ == nullptr) { | |
| 312 box = it_->word()->box_word->BlobBox(blob_index_); | |
| 313 } else { | |
| 314 box = cblob_it_->data()->bounding_box(); | |
| 315 } | |
| 316 } | |
| 317 if (level == RIL_PARA) { | |
| 318 PageIterator other = *this; | |
| 319 other.Begin(); | |
| 320 do { | |
| 321 if (other.it_->block() && | |
| 322 other.it_->block()->block == it_->block()->block && | |
| 323 other.it_->row() && other.it_->row()->row && | |
| 324 other.it_->row()->row->para() == para) { | |
| 325 box = box.bounding_union(other.it_->row()->row->bounding_box()); | |
| 326 } | |
| 327 } while (other.Next(RIL_TEXTLINE)); | |
| 328 } | |
| 329 if (level != RIL_SYMBOL || cblob_it_ != nullptr) { | |
| 330 box.rotate(it_->block()->block->re_rotation()); | |
| 331 } | |
| 332 // Now we have a box in tesseract coordinates relative to the image rectangle, | |
| 333 // we have to convert the coords to a top-down system. | |
| 334 const int pix_height = pixGetHeight(tesseract_->pix_binary()); | |
| 335 const int pix_width = pixGetWidth(tesseract_->pix_binary()); | |
| 336 *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); | |
| 337 *top = ClipToRange(pix_height - box.top(), 0, pix_height); | |
| 338 *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); | |
| 339 *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); | |
| 340 return true; | |
| 341 } | |
| 342 | |
| 343 /** | |
| 344 * Returns the bounding rectangle of the current object at the given level in | |
| 345 * coordinates of the original image. | |
| 346 * See comment on coordinate system above. | |
| 347 * Returns false if there is no such object at the current position. | |
| 348 */ | |
| 349 bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, | |
| 350 int *right, int *bottom) const { | |
| 351 return BoundingBox(level, 0, left, top, right, bottom); | |
| 352 } | |
| 353 | |
| 354 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, | |
| 355 int *left, int *top, int *right, | |
| 356 int *bottom) const { | |
| 357 if (!BoundingBoxInternal(level, left, top, right, bottom)) { | |
| 358 return false; | |
| 359 } | |
| 360 // Convert to the coordinate system of the original image. | |
| 361 *left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, | |
| 362 rect_left_ + rect_width_); | |
| 363 *top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, | |
| 364 rect_top_ + rect_height_); | |
| 365 *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, | |
| 366 *left, rect_left_ + rect_width_); | |
| 367 *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, | |
| 368 *top, rect_top_ + rect_height_); | |
| 369 return true; | |
| 370 } | |
| 371 | |
| 372 /** Return that there is no such object at a given level. */ | |
| 373 bool PageIterator::Empty(PageIteratorLevel level) const { | |
| 374 if (it_->block() == nullptr) { | |
| 375 return true; // Already at the end! | |
| 376 } | |
| 377 if (it_->word() == nullptr && level != RIL_BLOCK) { | |
| 378 return true; // image block | |
| 379 } | |
| 380 if (level == RIL_SYMBOL && blob_index_ >= word_length_) { | |
| 381 return true; // Zero length word, or already at the end of it. | |
| 382 } | |
| 383 return false; | |
| 384 } | |
| 385 | |
| 386 /** Returns the type of the current block. | |
| 387 * See tesseract/publictypes.h for PolyBlockType. */ | |
| 388 PolyBlockType PageIterator::BlockType() const { | |
| 389 if (it_->block() == nullptr || it_->block()->block == nullptr) { | |
| 390 return PT_UNKNOWN; // Already at the end! | |
| 391 } | |
| 392 if (it_->block()->block->pdblk.poly_block() == nullptr) { | |
| 393 return PT_FLOWING_TEXT; // No layout analysis used - assume text. | |
| 394 } | |
| 395 return it_->block()->block->pdblk.poly_block()->isA(); | |
| 396 } | |
| 397 | |
| 398 /** Returns the polygon outline of the current block. The returned Pta must | |
| 399 * be ptaDestroy-ed after use. */ | |
| 400 Pta *PageIterator::BlockPolygon() const { | |
| 401 if (it_->block() == nullptr || it_->block()->block == nullptr) { | |
| 402 return nullptr; // Already at the end! | |
| 403 } | |
| 404 if (it_->block()->block->pdblk.poly_block() == nullptr) { | |
| 405 return nullptr; // No layout analysis used - no polygon. | |
| 406 } | |
| 407 // Copy polygon, so we can unrotate it to image coordinates. | |
| 408 POLY_BLOCK *internal_poly = it_->block()->block->pdblk.poly_block(); | |
| 409 ICOORDELT_LIST vertices; | |
| 410 vertices.deep_copy(internal_poly->points(), ICOORDELT::deep_copy); | |
| 411 POLY_BLOCK poly(&vertices, internal_poly->isA()); | |
| 412 poly.rotate(it_->block()->block->re_rotation()); | |
| 413 ICOORDELT_IT it(poly.points()); | |
| 414 Pta *pta = ptaCreate(it.length()); | |
| 415 int num_pts = 0; | |
| 416 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) { | |
| 417 ICOORD *pt = it.data(); | |
| 418 // Convert to top-down coords within the input image. | |
| 419 int x = static_cast<float>(pt->x()) / scale_ + rect_left_; | |
| 420 int y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_; | |
| 421 x = ClipToRange(x, rect_left_, rect_left_ + rect_width_); | |
| 422 y = ClipToRange(y, rect_top_, rect_top_ + rect_height_); | |
| 423 ptaAddPt(pta, x, y); | |
| 424 } | |
| 425 return pta; | |
| 426 } | |
| 427 | |
| 428 /** | |
| 429 * Returns a binary image of the current object at the given level. | |
| 430 * The position and size match the return from BoundingBoxInternal, and so this | |
| 431 * could be upscaled with respect to the original input image. | |
| 432 * Use pixDestroy to delete the image after use. | |
| 433 * The following methods are used to generate the images: | |
| 434 * RIL_BLOCK: mask the page image with the block polygon. | |
| 435 * RIL_TEXTLINE: Clip the rectangle of the line box from the page image. | |
| 436 * TODO(rays) fix this to generate and use a line polygon. | |
| 437 * RIL_WORD: Clip the rectangle of the word box from the page image. | |
| 438 * RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior | |
| 439 * to recognition) or the bounding box otherwise. | |
| 440 * A reconstruction of the original image (using xor to check for double | |
| 441 * representation) should be reasonably accurate, | |
| 442 * apart from removed noise, at the block level. Below the block level, the | |
| 443 * reconstruction will be missing images and line separators. | |
| 444 * At the symbol level, kerned characters will be invade the bounding box | |
| 445 * if rendered after recognition, making an xor reconstruction inaccurate, but | |
| 446 * an or construction better. Before recognition, symbol-level reconstruction | |
| 447 * should be good, even with xor, since the images come from the connected | |
| 448 * components. | |
| 449 */ | |
| 450 Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const { | |
| 451 int left, top, right, bottom; | |
| 452 if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) { | |
| 453 return nullptr; | |
| 454 } | |
| 455 if (level == RIL_SYMBOL && cblob_it_ != nullptr && | |
| 456 cblob_it_->data()->area() != 0) { | |
| 457 return cblob_it_->data()->render(); | |
| 458 } | |
| 459 Box *box = boxCreate(left, top, right - left, bottom - top); | |
| 460 Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); | |
| 461 boxDestroy(&box); | |
| 462 if (level == RIL_BLOCK || level == RIL_PARA) { | |
| 463 // Clip to the block polygon as well. | |
| 464 TBOX mask_box; | |
| 465 Image mask = it_->block()->block->render_mask(&mask_box); | |
| 466 int mask_x = left - mask_box.left(); | |
| 467 int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); | |
| 468 // AND the mask and pix, putting the result in pix. | |
| 469 pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), | |
| 470 pixGetWidth(pix), pixGetHeight(pix), PIX_SRC & PIX_DST, mask, | |
| 471 std::max(0, mask_x), std::max(0, mask_y)); | |
| 472 mask.destroy(); | |
| 473 } | |
| 474 return pix; | |
| 475 } | |
| 476 | |
| 477 /** | |
| 478 * Returns an image of the current object at the given level in greyscale | |
| 479 * if available in the input. To guarantee a binary image use BinaryImage. | |
| 480 * NOTE that in order to give the best possible image, the bounds are | |
| 481 * expanded slightly over the binary connected component, by the supplied | |
| 482 * padding, so the top-left position of the returned image is returned | |
| 483 * in (left,top). These will most likely not match the coordinates | |
| 484 * returned by BoundingBox. | |
| 485 * If you do not supply an original image, you will get a binary one. | |
| 486 * Use pixDestroy to delete the image after use. | |
| 487 */ | |
| 488 Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, | |
| 489 Pix *original_img, int *left, int *top) const { | |
| 490 int right, bottom; | |
| 491 if (!BoundingBox(level, left, top, &right, &bottom)) { | |
| 492 return nullptr; | |
| 493 } | |
| 494 if (original_img == nullptr) { | |
| 495 return GetBinaryImage(level); | |
| 496 } | |
| 497 | |
| 498 // Expand the box. | |
| 499 *left = std::max(*left - padding, 0); | |
| 500 *top = std::max(*top - padding, 0); | |
| 501 right = std::min(right + padding, rect_width_); | |
| 502 bottom = std::min(bottom + padding, rect_height_); | |
| 503 Box *box = boxCreate(*left, *top, right - *left, bottom - *top); | |
| 504 Image grey_pix = pixClipRectangle(original_img, box, nullptr); | |
| 505 boxDestroy(&box); | |
| 506 if (level == RIL_BLOCK || level == RIL_PARA) { | |
| 507 // Clip to the block polygon as well. | |
| 508 TBOX mask_box; | |
| 509 Image mask = it_->block()->block->render_mask(&mask_box); | |
| 510 // Copy the mask registered correctly into an image the size of grey_pix. | |
| 511 int mask_x = *left - mask_box.left(); | |
| 512 int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); | |
| 513 int width = pixGetWidth(grey_pix); | |
| 514 int height = pixGetHeight(grey_pix); | |
| 515 Image resized_mask = pixCreate(width, height, 1); | |
| 516 pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, | |
| 517 height, PIX_SRC, mask, std::max(0, mask_x), | |
| 518 std::max(0, mask_y)); | |
| 519 mask.destroy(); | |
| 520 pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, | |
| 521 2 * padding + 1); | |
| 522 pixInvert(resized_mask, resized_mask); | |
| 523 pixSetMasked(grey_pix, resized_mask, UINT32_MAX); | |
| 524 resized_mask.destroy(); | |
| 525 } | |
| 526 return grey_pix; | |
| 527 } | |
| 528 | |
| 529 /** | |
| 530 * Returns the baseline of the current object at the given level. | |
| 531 * The baseline is the line that passes through (x1, y1) and (x2, y2). | |
| 532 * WARNING: with vertical text, baselines may be vertical! | |
| 533 */ | |
| 534 bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, | |
| 535 int *y2) const { | |
| 536 if (it_->word() == nullptr) { | |
| 537 return false; // Already at the end! | |
| 538 } | |
| 539 ROW *row = it_->row()->row; | |
| 540 WERD *word = it_->word()->word; | |
| 541 TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() | |
| 542 : row->bounding_box(); | |
| 543 int left = box.left(); | |
| 544 ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5)); | |
| 545 int right = box.right(); | |
| 546 ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5)); | |
| 547 // Rotate to image coordinates and convert to global image coords. | |
| 548 startpt.rotate(it_->block()->block->re_rotation()); | |
| 549 endpt.rotate(it_->block()->block->re_rotation()); | |
| 550 *x1 = startpt.x() / scale_ + rect_left_; | |
| 551 *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; | |
| 552 *x2 = endpt.x() / scale_ + rect_left_; | |
| 553 *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; | |
| 554 return true; | |
| 555 } | |
| 556 | |
| 557 void PageIterator::RowAttributes(float *row_height, float *descenders, | |
| 558 float *ascenders) const { | |
| 559 *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - | |
| 560 it_->row()->row->descenders(); | |
| 561 *descenders = it_->row()->row->descenders(); | |
| 562 *ascenders = it_->row()->row->ascenders(); | |
| 563 } | |
| 564 | |
| 565 void PageIterator::Orientation(tesseract::Orientation *orientation, | |
| 566 tesseract::WritingDirection *writing_direction, | |
| 567 tesseract::TextlineOrder *textline_order, | |
| 568 float *deskew_angle) const { | |
| 569 auto *block_res = it_->block(); | |
| 570 if (block_res == nullptr) { | |
| 571 // Nothing can be done, so return default values. | |
| 572 *orientation = ORIENTATION_PAGE_UP; | |
| 573 *writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT; | |
| 574 *textline_order = TEXTLINE_ORDER_TOP_TO_BOTTOM; | |
| 575 return; | |
| 576 } | |
| 577 auto *block = block_res->block; | |
| 578 | |
| 579 // Orientation | |
| 580 FCOORD up_in_image(0.0, 1.0); | |
| 581 up_in_image.unrotate(block->classify_rotation()); | |
| 582 up_in_image.rotate(block->re_rotation()); | |
| 583 | |
| 584 if (up_in_image.x() == 0.0F) { | |
| 585 if (up_in_image.y() > 0.0F) { | |
| 586 *orientation = ORIENTATION_PAGE_UP; | |
| 587 } else { | |
| 588 *orientation = ORIENTATION_PAGE_DOWN; | |
| 589 } | |
| 590 } else if (up_in_image.x() > 0.0F) { | |
| 591 *orientation = ORIENTATION_PAGE_RIGHT; | |
| 592 } else { | |
| 593 *orientation = ORIENTATION_PAGE_LEFT; | |
| 594 } | |
| 595 | |
| 596 // Writing direction | |
| 597 bool is_vertical_text = (block->classify_rotation().x() == 0.0); | |
| 598 bool right_to_left = block->right_to_left(); | |
| 599 *writing_direction = is_vertical_text | |
| 600 ? WRITING_DIRECTION_TOP_TO_BOTTOM | |
| 601 : (right_to_left ? WRITING_DIRECTION_RIGHT_TO_LEFT | |
| 602 : WRITING_DIRECTION_LEFT_TO_RIGHT); | |
| 603 | |
| 604 // Textline Order | |
| 605 const bool is_mongolian = false; // TODO(eger): fix me | |
| 606 *textline_order = is_vertical_text | |
| 607 ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RIGHT | |
| 608 : TEXTLINE_ORDER_RIGHT_TO_LEFT) | |
| 609 : TEXTLINE_ORDER_TOP_TO_BOTTOM; | |
| 610 | |
| 611 // Deskew angle | |
| 612 FCOORD skew = block->skew(); // true horizontal for textlines | |
| 613 *deskew_angle = -skew.angle(); | |
| 614 } | |
| 615 | |
| 616 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, | |
| 617 bool *is_list_item, bool *is_crown, | |
| 618 int *first_line_indent) const { | |
| 619 *just = tesseract::JUSTIFICATION_UNKNOWN; | |
| 620 if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || | |
| 621 !it_->row()->row->para()->model) { | |
| 622 return; | |
| 623 } | |
| 624 | |
| 625 PARA *para = it_->row()->row->para(); | |
| 626 *is_list_item = para->is_list_item; | |
| 627 *is_crown = para->is_very_first_or_continuation; | |
| 628 *first_line_indent = para->model->first_indent() - para->model->body_indent(); | |
| 629 *just = para->model->justification(); | |
| 630 } | |
| 631 | |
| 632 /** | |
| 633 * Sets up the internal data for iterating the blobs of a new word, then | |
| 634 * moves the iterator to the given offset. | |
| 635 */ | |
| 636 void PageIterator::BeginWord(int offset) { | |
| 637 WERD_RES *word_res = it_->word(); | |
| 638 if (word_res == nullptr) { | |
| 639 // This is a non-text block, so there is no word. | |
| 640 word_length_ = 0; | |
| 641 blob_index_ = 0; | |
| 642 word_ = nullptr; | |
| 643 return; | |
| 644 } | |
| 645 if (word_res->best_choice != nullptr) { | |
| 646 // Recognition has been done, so we are using the box_word, which | |
| 647 // is already baseline denormalized. | |
| 648 word_length_ = word_res->best_choice->length(); | |
| 649 if (word_res->box_word != nullptr) { | |
| 650 if (word_res->box_word->length() != static_cast<unsigned>(word_length_)) { | |
| 651 tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", | |
| 652 word_length_, word_res->best_choice->unichar_string().c_str(), | |
| 653 word_res->box_word->length()); | |
| 654 word_res->box_word->bounding_box().print(); | |
| 655 } | |
| 656 ASSERT_HOST(word_res->box_word->length() == | |
| 657 static_cast<unsigned>(word_length_)); | |
| 658 } | |
| 659 word_ = nullptr; | |
| 660 // We will be iterating the box_word. | |
| 661 delete cblob_it_; | |
| 662 cblob_it_ = nullptr; | |
| 663 } else { | |
| 664 // No recognition yet, so a "symbol" is a cblob. | |
| 665 word_ = word_res->word; | |
| 666 ASSERT_HOST(word_->cblob_list() != nullptr); | |
| 667 word_length_ = word_->cblob_list()->length(); | |
| 668 if (cblob_it_ == nullptr) { | |
| 669 cblob_it_ = new C_BLOB_IT; | |
| 670 } | |
| 671 cblob_it_->set_to_list(word_->cblob_list()); | |
| 672 } | |
| 673 for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { | |
| 674 if (cblob_it_ != nullptr) { | |
| 675 cblob_it_->forward(); | |
| 676 } | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) { | |
| 681 if (it_->word() != nullptr) { | |
| 682 it_->word()->blamer_bundle = blamer_bundle; | |
| 683 return true; | |
| 684 } else { | |
| 685 return false; | |
| 686 } | |
| 687 } | |
| 688 | |
| 689 } // namespace tesseract. |
