Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/cjkpitch.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: cjkpitch.cpp | |
| 3 // Description: Code to determine fixed pitchness and the pitch if fixed, | |
| 4 // for CJK text. | |
| 5 // Author: takenaka@google.com (Hiroshi Takenaka) | |
| 6 // | |
| 7 // Copyright 2011 Google Inc. All Rights Reserved. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #include "cjkpitch.h" | |
| 21 #include "topitch.h" | |
| 22 #include "tovars.h" | |
| 23 | |
| 24 #include <algorithm> // for std::sort | |
| 25 #include <cmath> | |
| 26 #include <vector> // for std::vector | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 static BOOL_VAR(textord_space_size_is_variable, false, | |
| 31 "If true, word delimiter spaces are assumed to have " | |
| 32 "variable width, even though characters have fixed pitch."); | |
| 33 | |
| 34 // Allow +/-10% error for character pitch / body size. | |
| 35 static const float kFPTolerance = 0.1f; | |
| 36 | |
| 37 // Minimum ratio of "good" character pitch for a row to be considered | |
| 38 // to be fixed-pitch. | |
| 39 static const float kFixedPitchThreshold = 0.35f; | |
| 40 | |
| 41 // rank statistics for a small collection of float values. | |
| 42 class SimpleStats { | |
| 43 public: | |
| 44 SimpleStats() = default; | |
| 45 ~SimpleStats() = default; | |
| 46 | |
| 47 void Clear() { | |
| 48 values_.clear(); | |
| 49 finalized_ = false; | |
| 50 } | |
| 51 | |
| 52 void Add(float value) { | |
| 53 values_.push_back(value); | |
| 54 finalized_ = false; | |
| 55 } | |
| 56 | |
| 57 void Finish() { | |
| 58 std::sort(values_.begin(), values_.end()); | |
| 59 finalized_ = true; | |
| 60 } | |
| 61 | |
| 62 float ile(double frac) { | |
| 63 if (!finalized_) { | |
| 64 Finish(); | |
| 65 } | |
| 66 if (values_.empty()) { | |
| 67 return 0.0f; | |
| 68 } | |
| 69 if (frac >= 1.0) { | |
| 70 return values_.back(); | |
| 71 } | |
| 72 if (frac <= 0.0 || values_.size() == 1) { | |
| 73 return values_[0]; | |
| 74 } | |
| 75 int index = static_cast<int>((values_.size() - 1) * frac); | |
| 76 float reminder = (values_.size() - 1) * frac - index; | |
| 77 | |
| 78 return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder; | |
| 79 } | |
| 80 | |
| 81 float median() { | |
| 82 return ile(0.5); | |
| 83 } | |
| 84 | |
| 85 float minimum() { | |
| 86 if (!finalized_) { | |
| 87 Finish(); | |
| 88 } | |
| 89 if (values_.empty()) { | |
| 90 return 0.0f; | |
| 91 } | |
| 92 return values_[0]; | |
| 93 } | |
| 94 | |
| 95 bool empty() const { | |
| 96 return values_.empty(); | |
| 97 } | |
| 98 | |
| 99 int size() const { | |
| 100 return values_.size(); | |
| 101 } | |
| 102 | |
| 103 private: | |
| 104 bool finalized_ = false; | |
| 105 std::vector<float> values_; | |
| 106 }; | |
| 107 | |
| 108 // statistics for a small collection of float pairs (x, y). | |
| 109 // EstimateYFor(x, r) returns the estimated y at x, based on | |
| 110 // existing samples between x*(1-r) ~ x*(1+r). | |
| 111 class LocalCorrelation { | |
| 112 public: | |
| 113 struct float_pair { | |
| 114 float x, y; | |
| 115 int vote; | |
| 116 }; | |
| 117 | |
| 118 LocalCorrelation() : finalized_(false) {} | |
| 119 ~LocalCorrelation() = default; | |
| 120 | |
| 121 void Finish() { | |
| 122 std::sort(values_.begin(), values_.end(), float_pair_compare); | |
| 123 finalized_ = true; | |
| 124 } | |
| 125 | |
| 126 void Clear() { | |
| 127 finalized_ = false; | |
| 128 } | |
| 129 | |
| 130 void Add(float x, float y, int v) { | |
| 131 struct float_pair value; | |
| 132 value.x = x; | |
| 133 value.y = y; | |
| 134 value.vote = v; | |
| 135 values_.push_back(value); | |
| 136 finalized_ = false; | |
| 137 } | |
| 138 | |
| 139 float EstimateYFor(float x, float r) { | |
| 140 ASSERT_HOST(finalized_); | |
| 141 unsigned start = 0, end = values_.size(); | |
| 142 // Because the number of samples (used_) is assumed to be small, | |
| 143 // just use linear search to find values within the range. | |
| 144 while (start < values_.size() && values_[start].x < x * (1 - r)) { | |
| 145 start++; | |
| 146 } | |
| 147 while (end > 0 && values_[end - 1].x > x * (1 + r)) { | |
| 148 end--; | |
| 149 } | |
| 150 | |
| 151 // Fall back to the global average if there are no data within r | |
| 152 // of x. | |
| 153 if (start >= end) { | |
| 154 start = 0; | |
| 155 end = values_.size(); | |
| 156 } | |
| 157 | |
| 158 // Compute weighted average of the values. | |
| 159 float rc = 0; | |
| 160 int vote = 0; | |
| 161 for (auto i = start; i < end; i++) { | |
| 162 rc += values_[i].vote * x * values_[i].y / values_[i].x; | |
| 163 vote += values_[i].vote; | |
| 164 } | |
| 165 | |
| 166 return vote == 0 ? 0.0f : rc / vote; | |
| 167 } | |
| 168 | |
| 169 private: | |
| 170 static bool float_pair_compare(const float_pair f_a, const float_pair f_b) { | |
| 171 return f_a.x < f_b.x; | |
| 172 } | |
| 173 | |
| 174 bool finalized_; | |
| 175 std::vector<struct float_pair> values_; | |
| 176 }; | |
| 177 | |
| 178 // Class to represent a character on a fixed pitch row. A FPChar may | |
| 179 // consist of multiple blobs (BLOBNBOX's). | |
| 180 class FPChar { | |
| 181 public: | |
| 182 enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD }; | |
| 183 | |
| 184 FPChar() | |
| 185 : box_() | |
| 186 , real_body_() | |
| 187 , from_(nullptr) | |
| 188 , to_(nullptr) | |
| 189 , num_blobs_(0) | |
| 190 , max_gap_(0) | |
| 191 , final_(false) | |
| 192 , alignment_(ALIGN_UNKNOWN) | |
| 193 , merge_to_prev_(false) | |
| 194 , delete_flag_(false) {} | |
| 195 | |
| 196 // Initialize from blob. | |
| 197 void Init(BLOBNBOX *blob) { | |
| 198 box_ = blob->bounding_box(); | |
| 199 real_body_ = box_; | |
| 200 from_ = to_ = blob; | |
| 201 num_blobs_ = 1; | |
| 202 } | |
| 203 | |
| 204 // Merge this character with "next". The "next" character should | |
| 205 // consist of succeeding blobs on the same row. | |
| 206 void Merge(const FPChar &next) { | |
| 207 int gap = real_body_.x_gap(next.real_body_); | |
| 208 if (gap > max_gap_) { | |
| 209 max_gap_ = gap; | |
| 210 } | |
| 211 | |
| 212 box_ += next.box_; | |
| 213 real_body_ += next.real_body_; | |
| 214 to_ = next.to_; | |
| 215 num_blobs_ += next.num_blobs_; | |
| 216 } | |
| 217 | |
| 218 // Accessors. | |
| 219 const TBOX &box() const { | |
| 220 return box_; | |
| 221 } | |
| 222 void set_box(const TBOX &box) { | |
| 223 box_ = box; | |
| 224 } | |
| 225 const TBOX &real_body() const { | |
| 226 return real_body_; | |
| 227 } | |
| 228 | |
| 229 bool is_final() const { | |
| 230 return final_; | |
| 231 } | |
| 232 void set_final(bool flag) { | |
| 233 final_ = flag; | |
| 234 } | |
| 235 | |
| 236 const Alignment &alignment() const { | |
| 237 return alignment_; | |
| 238 } | |
| 239 void set_alignment(Alignment alignment) { | |
| 240 alignment_ = alignment; | |
| 241 } | |
| 242 | |
| 243 bool merge_to_prev() const { | |
| 244 return merge_to_prev_; | |
| 245 } | |
| 246 void set_merge_to_prev(bool flag) { | |
| 247 merge_to_prev_ = flag; | |
| 248 } | |
| 249 | |
| 250 bool delete_flag() const { | |
| 251 return delete_flag_; | |
| 252 } | |
| 253 void set_delete_flag(bool flag) { | |
| 254 delete_flag_ = flag; | |
| 255 } | |
| 256 | |
| 257 int max_gap() const { | |
| 258 return max_gap_; | |
| 259 } | |
| 260 | |
| 261 int num_blobs() const { | |
| 262 return num_blobs_; | |
| 263 } | |
| 264 | |
| 265 private: | |
| 266 TBOX box_; // Rectangle region considered to be occupied by this | |
| 267 // character. It could be bigger than the bounding box. | |
| 268 TBOX real_body_; // Real bounding box of this character. | |
| 269 BLOBNBOX *from_; // The first blob of this character. | |
| 270 BLOBNBOX *to_; // The last blob of this character. | |
| 271 int num_blobs_; // Number of blobs that belong to this character. | |
| 272 int max_gap_; // Maximum x gap between the blobs. | |
| 273 | |
| 274 bool final_; // True if alignment/fragmentation decision for this | |
| 275 // character is finalized. | |
| 276 | |
| 277 Alignment alignment_; // Alignment status. | |
| 278 bool merge_to_prev_; // True if this is a fragmented blob that | |
| 279 // needs to be merged to the previous | |
| 280 // character. | |
| 281 | |
| 282 int delete_flag_; // True if this character is merged to another | |
| 283 // one and needs to be deleted. | |
| 284 }; | |
| 285 | |
| 286 // Class to represent a fixed pitch row, as a linear collection of | |
| 287 // FPChar's. | |
| 288 class FPRow { | |
| 289 public: | |
| 290 FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {} | |
| 291 | |
| 292 ~FPRow() = default; | |
| 293 | |
| 294 // Initialize from TD_ROW. | |
| 295 void Init(TO_ROW *row); | |
| 296 | |
| 297 // Estimate character pitch of this row, based on current alignment | |
| 298 // status of underlying FPChar's. The argument pass1 can be set to | |
| 299 // true if the function is called after Pass1Analyze(), to eliminate | |
| 300 // some redundant computation. | |
| 301 void EstimatePitch(bool pass1); | |
| 302 | |
| 303 // Check each character if it has good character pitches between its | |
| 304 // predecessor and its successor and set its alignment status. If | |
| 305 // we already calculated the estimated pitch for this row, the value | |
| 306 // is used. If we didn't, a character is considered to be good, if | |
| 307 // the pitches between its predecessor and its successor are almost | |
| 308 // equal. | |
| 309 void Pass1Analyze(); | |
| 310 | |
| 311 // Find characters that fit nicely into one imaginary body next to a | |
| 312 // character which is already finalized. Then mark them as character | |
| 313 // fragments. | |
| 314 bool Pass2Analyze(); | |
| 315 | |
| 316 // Merge FPChars marked as character fragments into one. | |
| 317 void MergeFragments(); | |
| 318 | |
| 319 // Finalize characters that are already large enough and cannot be | |
| 320 // merged with others any more. | |
| 321 void FinalizeLargeChars(); | |
| 322 | |
| 323 // Output pitch estimation results to attributes of TD_ROW. | |
| 324 void OutputEstimations(); | |
| 325 | |
| 326 void DebugOutputResult(int row_index); | |
| 327 | |
| 328 int good_pitches() { | |
| 329 return good_pitches_.size(); | |
| 330 } | |
| 331 | |
| 332 float pitch() { | |
| 333 return pitch_; | |
| 334 } | |
| 335 | |
| 336 float estimated_pitch() { | |
| 337 return estimated_pitch_; | |
| 338 } | |
| 339 | |
| 340 void set_estimated_pitch(float v) { | |
| 341 estimated_pitch_ = v; | |
| 342 } | |
| 343 | |
| 344 float height() { | |
| 345 return height_; | |
| 346 } | |
| 347 | |
| 348 float height_pitch_ratio() { | |
| 349 if (good_pitches_.size() < 2) { | |
| 350 return -1.0; | |
| 351 } | |
| 352 return height_ / good_pitches_.median(); | |
| 353 } | |
| 354 | |
| 355 float gap() { | |
| 356 return gap_; | |
| 357 } | |
| 358 | |
| 359 size_t num_chars() { | |
| 360 return characters_.size(); | |
| 361 } | |
| 362 FPChar *character(int i) { | |
| 363 return &characters_[i]; | |
| 364 } | |
| 365 | |
| 366 const TBOX &box(int i) { | |
| 367 return characters_[i].box(); | |
| 368 } | |
| 369 | |
| 370 const TBOX &real_body(int i) { | |
| 371 return characters_[i].real_body(); | |
| 372 } | |
| 373 | |
| 374 bool is_box_modified(int i) { | |
| 375 return !(characters_[i].box() == characters_[i].real_body()); | |
| 376 } | |
| 377 | |
| 378 float center_x(int i) { | |
| 379 return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; | |
| 380 } | |
| 381 | |
| 382 bool is_final(int i) { | |
| 383 return characters_[i].is_final(); | |
| 384 } | |
| 385 | |
| 386 void finalize(int i) { | |
| 387 characters_[i].set_final(true); | |
| 388 } | |
| 389 | |
| 390 bool is_good(int i) { | |
| 391 return characters_[i].alignment() == FPChar::ALIGN_GOOD; | |
| 392 } | |
| 393 | |
| 394 void mark_good(int i) { | |
| 395 characters_[i].set_alignment(FPChar::ALIGN_GOOD); | |
| 396 } | |
| 397 | |
| 398 void mark_bad(int i) { | |
| 399 characters_[i].set_alignment(FPChar::ALIGN_BAD); | |
| 400 } | |
| 401 | |
| 402 void clear_alignment(int i) { | |
| 403 characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); | |
| 404 } | |
| 405 | |
| 406 private: | |
| 407 static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) { | |
| 408 if (std::min(box1.width(), box2.width()) == 0) { | |
| 409 return 0.0; | |
| 410 } | |
| 411 return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width())); | |
| 412 } | |
| 413 | |
| 414 static bool mostly_overlap(const TBOX &box1, const TBOX &box2) { | |
| 415 return x_overlap_fraction(box1, box2) > 0.9; | |
| 416 } | |
| 417 | |
| 418 static bool significant_overlap(const TBOX &box1, const TBOX &box2) { | |
| 419 if (std::min(box1.width(), box2.width()) == 0) { | |
| 420 return false; | |
| 421 } | |
| 422 int overlap = -box1.x_gap(box2); | |
| 423 return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1; | |
| 424 } | |
| 425 | |
| 426 static float box_pitch(const TBOX &ref, const TBOX &box) { | |
| 427 return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0; | |
| 428 } | |
| 429 | |
| 430 // Check if two neighboring characters satisfy the fixed pitch model. | |
| 431 static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) { | |
| 432 // Character box shouldn't exceed pitch. | |
| 433 if (box1.width() >= pitch * (1.0 + kFPTolerance) || | |
| 434 box2.width() >= pitch * (1.0 + kFPTolerance) || | |
| 435 box1.height() >= pitch * (1.0 + kFPTolerance) || | |
| 436 box2.height() >= pitch * (1.0 + kFPTolerance)) { | |
| 437 return false; | |
| 438 } | |
| 439 | |
| 440 const float real_pitch = box_pitch(box1, box2); | |
| 441 if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) { | |
| 442 return true; | |
| 443 } | |
| 444 | |
| 445 if (textord_space_size_is_variable) { | |
| 446 // Hangul characters usually have fixed pitch, but words are | |
| 447 // delimited by space which can be narrower than characters. | |
| 448 if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) { | |
| 449 return true; | |
| 450 } | |
| 451 } | |
| 452 return false; | |
| 453 } | |
| 454 | |
| 455 static bool is_interesting_blob(const BLOBNBOX *blob) { | |
| 456 return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; | |
| 457 } | |
| 458 | |
| 459 // Cleanup chars that are already merged to others. | |
| 460 void DeleteChars() { | |
| 461 unsigned index = 0; | |
| 462 for (unsigned i = 0; i < characters_.size(); ++i) { | |
| 463 if (!characters_[i].delete_flag()) { | |
| 464 if (index != i) { | |
| 465 characters_[index] = characters_[i]; | |
| 466 } | |
| 467 index++; | |
| 468 } | |
| 469 } | |
| 470 characters_.resize(index); | |
| 471 } | |
| 472 | |
| 473 float pitch_ = 0.0f; // Character pitch. | |
| 474 float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered | |
| 475 // to be good enough. | |
| 476 float height_ = 0.0f; // Character height. | |
| 477 float gap_ = 0.0f; // Minimum gap between characters. | |
| 478 | |
| 479 // Pitches between any two successive characters. | |
| 480 SimpleStats all_pitches_; | |
| 481 // Gaps between any two successive characters. | |
| 482 SimpleStats all_gaps_; | |
| 483 // Pitches between any two successive characters that are consistent | |
| 484 // with the fixed pitch model. | |
| 485 SimpleStats good_pitches_; | |
| 486 // Gaps between any two successive characters that are consistent | |
| 487 // with the fixed pitch model. | |
| 488 SimpleStats good_gaps_; | |
| 489 | |
| 490 SimpleStats heights_; | |
| 491 | |
| 492 std::vector<FPChar> characters_; | |
| 493 TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row. | |
| 494 }; | |
| 495 | |
| 496 void FPRow::Init(TO_ROW *row) { | |
| 497 ASSERT_HOST(row != nullptr); | |
| 498 ASSERT_HOST(row->xheight > 0); | |
| 499 real_row_ = row; | |
| 500 real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision. | |
| 501 | |
| 502 BLOBNBOX_IT blob_it = row->blob_list(); | |
| 503 // Initialize characters_ and compute the initial estimation of | |
| 504 // character height. | |
| 505 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 506 if (is_interesting_blob(blob_it.data())) { | |
| 507 FPChar fp_char; | |
| 508 fp_char.Init(blob_it.data()); | |
| 509 // Merge unconditionally if two blobs overlap. | |
| 510 if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) { | |
| 511 characters_.back().Merge(fp_char); | |
| 512 } else { | |
| 513 characters_.push_back(fp_char); | |
| 514 } | |
| 515 TBOX bound = blob_it.data()->bounding_box(); | |
| 516 if (bound.height() * 3.0 > bound.width()) { | |
| 517 heights_.Add(bound.height()); | |
| 518 } | |
| 519 } | |
| 520 } | |
| 521 heights_.Finish(); | |
| 522 height_ = heights_.ile(0.875); | |
| 523 } | |
| 524 | |
| 525 void FPRow::OutputEstimations() { | |
| 526 if (good_pitches_.empty()) { | |
| 527 pitch_ = 0.0f; | |
| 528 real_row_->pitch_decision = PITCH_CORR_PROP; | |
| 529 return; | |
| 530 } | |
| 531 | |
| 532 pitch_ = good_pitches_.median(); | |
| 533 real_row_->fixed_pitch = pitch_; | |
| 534 // good_gaps_.ile(0.125) can be large if most characters on the row | |
| 535 // are skinny. Use pitch_ - height_ instead if it's smaller, but | |
| 536 // positive. | |
| 537 real_row_->kern_size = real_row_->pr_nonsp = | |
| 538 std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); | |
| 539 real_row_->body_size = pitch_ - real_row_->kern_size; | |
| 540 | |
| 541 if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { | |
| 542 // If more than half of the characters of a line don't fit to the | |
| 543 // fixed pitch model, consider the line to be proportional. 50% | |
| 544 // seems to be a good threshold in practice as well. | |
| 545 // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in | |
| 546 // real_row_ as a partial estimation result and try to use them in the | |
| 547 // normalization process. | |
| 548 real_row_->pitch_decision = PITCH_CORR_PROP; | |
| 549 return; | |
| 550 } else if (good_pitches_.size() > all_pitches_.size() * 0.75) { | |
| 551 real_row_->pitch_decision = PITCH_DEF_FIXED; | |
| 552 } else { | |
| 553 real_row_->pitch_decision = PITCH_CORR_FIXED; | |
| 554 } | |
| 555 | |
| 556 real_row_->space_size = real_row_->pr_space = pitch_; | |
| 557 // Set min_space to 50% of character pitch so that we can break CJK | |
| 558 // text at a half-width space after punctuation. | |
| 559 real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5; | |
| 560 | |
| 561 // Don't consider a quarter space as a real space, because it's used | |
| 562 // for line justification in traditional Japanese books. | |
| 563 real_row_->max_nonspace = | |
| 564 std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875))); | |
| 565 | |
| 566 int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2, | |
| 567 static_cast<int>(real_row_->xheight)); | |
| 568 | |
| 569 // Make max_nonspace larger than any intra-character gap so that | |
| 570 // make_prop_words() won't break a row at the middle of a character. | |
| 571 for (size_t i = 0; i < num_chars(); ++i) { | |
| 572 if (characters_[i].max_gap() > real_row_->max_nonspace) { | |
| 573 real_row_->max_nonspace = characters_[i].max_gap(); | |
| 574 } | |
| 575 } | |
| 576 real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2, | |
| 577 static_cast<int>(real_row_->xheight)); | |
| 578 real_row_->used_dm_model = false; | |
| 579 | |
| 580 // Setup char_cells. | |
| 581 ICOORDELT_IT cell_it = &real_row_->char_cells; | |
| 582 auto *cell = new ICOORDELT(real_body(0).left(), 0); | |
| 583 cell_it.add_after_then_move(cell); | |
| 584 | |
| 585 int right = real_body(0).right(); | |
| 586 for (size_t i = 1; i < num_chars(); ++i) { | |
| 587 // Put a word break if gap between two characters is bigger than | |
| 588 // space_threshold. Don't break if none of two characters | |
| 589 // couldn't be "finalized", because maybe they need to be merged | |
| 590 // to one character. | |
| 591 if ((is_final(i - 1) || is_final(i)) && | |
| 592 real_body(i - 1).x_gap(real_body(i)) > space_threshold) { | |
| 593 cell = new ICOORDELT(right + 1, 0); | |
| 594 cell_it.add_after_then_move(cell); | |
| 595 while (right + pitch_ < box(i).left()) { | |
| 596 right += pitch_; | |
| 597 cell = new ICOORDELT(right + 1, 0); | |
| 598 cell_it.add_after_then_move(cell); | |
| 599 } | |
| 600 right = box(i).left(); | |
| 601 } | |
| 602 cell = new ICOORDELT((right + real_body(i).left()) / 2, 0); | |
| 603 cell_it.add_after_then_move(cell); | |
| 604 right = real_body(i).right(); | |
| 605 } | |
| 606 | |
| 607 cell = new ICOORDELT(right + 1, 0); | |
| 608 cell_it.add_after_then_move(cell); | |
| 609 | |
| 610 // TODO(takenaka): add code to store alignment/fragmentation | |
| 611 // information to blobs so that it can be reused later, e.g. in | |
| 612 // recognition phase. | |
| 613 } | |
| 614 | |
| 615 void FPRow::EstimatePitch(bool pass1) { | |
| 616 good_pitches_.Clear(); | |
| 617 all_pitches_.Clear(); | |
| 618 good_gaps_.Clear(); | |
| 619 all_gaps_.Clear(); | |
| 620 heights_.Clear(); | |
| 621 if (num_chars() == 0) { | |
| 622 return; | |
| 623 } | |
| 624 | |
| 625 int32_t cx0, cx1; | |
| 626 bool prev_was_good = is_good(0); | |
| 627 cx0 = center_x(0); | |
| 628 | |
| 629 heights_.Add(box(0).height()); | |
| 630 for (size_t i = 1; i < num_chars(); i++) { | |
| 631 cx1 = center_x(i); | |
| 632 int32_t pitch = cx1 - cx0; | |
| 633 int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i))); | |
| 634 | |
| 635 heights_.Add(box(i).height()); | |
| 636 // Ignore if the pitch is too close. But don't ignore wide pitch | |
| 637 // may be the result of large tracking. | |
| 638 if (pitch > height_ * 0.5) { | |
| 639 all_pitches_.Add(pitch); | |
| 640 all_gaps_.Add(gap); | |
| 641 if (is_good(i)) { | |
| 642 // In pass1 (after Pass1Analyze()), all characters marked as | |
| 643 // "good" have a good consistent pitch with their previous | |
| 644 // characters. However, it's not true in pass2 and a good | |
| 645 // character may have a good pitch only between its successor. | |
| 646 // So we collect only pitch values between two good | |
| 647 // characters. and within tolerance in pass2. | |
| 648 if (pass1 || | |
| 649 (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) { | |
| 650 good_pitches_.Add(pitch); | |
| 651 if (!is_box_modified(i - 1) && !is_box_modified(i)) { | |
| 652 good_gaps_.Add(gap); | |
| 653 } | |
| 654 } | |
| 655 prev_was_good = true; | |
| 656 } else { | |
| 657 prev_was_good = false; | |
| 658 } | |
| 659 } | |
| 660 cx0 = cx1; | |
| 661 } | |
| 662 | |
| 663 good_pitches_.Finish(); | |
| 664 all_pitches_.Finish(); | |
| 665 good_gaps_.Finish(); | |
| 666 all_gaps_.Finish(); | |
| 667 heights_.Finish(); | |
| 668 | |
| 669 height_ = heights_.ile(0.875); | |
| 670 if (all_pitches_.empty()) { | |
| 671 pitch_ = 0.0f; | |
| 672 gap_ = 0.0f; | |
| 673 } else if (good_pitches_.size() < 2) { | |
| 674 // We don't have enough data to estimate the pitch of this row yet. | |
| 675 // Use median of all pitches as the initial guess. | |
| 676 pitch_ = all_pitches_.median(); | |
| 677 ASSERT_HOST(pitch_ > 0.0f); | |
| 678 gap_ = all_gaps_.ile(0.125); | |
| 679 } else { | |
| 680 pitch_ = good_pitches_.median(); | |
| 681 ASSERT_HOST(pitch_ > 0.0f); | |
| 682 gap_ = good_gaps_.ile(0.125); | |
| 683 } | |
| 684 } | |
| 685 | |
| 686 void FPRow::DebugOutputResult(int row_index) { | |
| 687 if (num_chars() > 0) { | |
| 688 tprintf( | |
| 689 "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " | |
| 690 "space_size=%f, space_threshold=%d, xheight=%f\n", | |
| 691 row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch, | |
| 692 real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold, | |
| 693 real_row_->xheight); | |
| 694 | |
| 695 for (unsigned i = 0; i < num_chars(); i++) { | |
| 696 tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i), | |
| 697 character(i)->num_blobs()); | |
| 698 box(i).print(); | |
| 699 } | |
| 700 } | |
| 701 } | |
| 702 | |
| 703 void FPRow::Pass1Analyze() { | |
| 704 if (num_chars() < 2) { | |
| 705 return; | |
| 706 } | |
| 707 | |
| 708 if (estimated_pitch_ > 0.0f) { | |
| 709 for (size_t i = 2; i < num_chars(); i++) { | |
| 710 if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) && | |
| 711 is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { | |
| 712 mark_good(i - 1); | |
| 713 } | |
| 714 } | |
| 715 } else { | |
| 716 for (size_t i = 2; i < num_chars(); i++) { | |
| 717 if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) { | |
| 718 mark_good(i - 1); | |
| 719 } | |
| 720 } | |
| 721 } | |
| 722 character(0)->set_alignment(character(1)->alignment()); | |
| 723 character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment()); | |
| 724 } | |
| 725 | |
| 726 bool FPRow::Pass2Analyze() { | |
| 727 bool changed = false; | |
| 728 if (num_chars() <= 1 || estimated_pitch_ == 0.0f) { | |
| 729 return false; | |
| 730 } | |
| 731 for (size_t i = 0; i < num_chars(); i++) { | |
| 732 if (is_final(i)) { | |
| 733 continue; | |
| 734 } | |
| 735 | |
| 736 FPChar::Alignment alignment = character(i)->alignment(); | |
| 737 bool intersecting = false; | |
| 738 bool not_intersecting = false; | |
| 739 | |
| 740 if (i < num_chars() - 1 && is_final(i + 1)) { | |
| 741 // Next character is already finalized. Estimate the imaginary | |
| 742 // body including this character based on the character. Skip | |
| 743 // whitespace if necessary. | |
| 744 bool skipped_whitespaces = false; | |
| 745 float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; | |
| 746 while (c1 > box(i).right()) { | |
| 747 skipped_whitespaces = true; | |
| 748 c1 -= estimated_pitch_; | |
| 749 } | |
| 750 TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top()); | |
| 751 | |
| 752 // Collect all characters that mostly fit in the region. | |
| 753 // Also, their union height shouldn't be too big. | |
| 754 int j = i; | |
| 755 TBOX merged; | |
| 756 while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && | |
| 757 merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) { | |
| 758 merged += box(j); | |
| 759 j--; | |
| 760 } | |
| 761 | |
| 762 if (j >= 0 && significant_overlap(ibody, box(j))) { | |
| 763 // character(j) lies on the character boundary and doesn't fit | |
| 764 // well into the imaginary body. | |
| 765 if (!is_final(j)) { | |
| 766 intersecting = true; | |
| 767 } | |
| 768 } else { | |
| 769 not_intersecting = true; | |
| 770 if (i - j > 0) { | |
| 771 // Merge character(j+1) ... character(i) because they fit | |
| 772 // into the body nicely. | |
| 773 if (i - j == 1) { | |
| 774 // Only one char in the imaginary body. | |
| 775 if (!skipped_whitespaces) { | |
| 776 mark_good(i); | |
| 777 } | |
| 778 // set ibody as bounding box of this character to get | |
| 779 // better pitch analysis result for halfwidth glyphs | |
| 780 // followed by a halfwidth space. | |
| 781 if (box(i).width() <= estimated_pitch_ * 0.5) { | |
| 782 ibody += box(i); | |
| 783 character(i)->set_box(ibody); | |
| 784 } | |
| 785 character(i)->set_merge_to_prev(false); | |
| 786 finalize(i); | |
| 787 } else { | |
| 788 for (int k = i; k > j + 1; k--) { | |
| 789 character(k)->set_merge_to_prev(true); | |
| 790 } | |
| 791 } | |
| 792 } | |
| 793 } | |
| 794 } | |
| 795 if (i > 0 && is_final(i - 1)) { | |
| 796 // Now we repeat everything from the opposite side. Previous | |
| 797 // character is already finalized. Estimate the imaginary body | |
| 798 // including this character based on the character. | |
| 799 bool skipped_whitespaces = false; | |
| 800 float c1 = center_x(i - 1) + 1.5 * estimated_pitch_; | |
| 801 while (c1 < box(i).left()) { | |
| 802 skipped_whitespaces = true; | |
| 803 c1 += estimated_pitch_; | |
| 804 } | |
| 805 TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top()); | |
| 806 | |
| 807 size_t j = i; | |
| 808 TBOX merged; | |
| 809 while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && | |
| 810 merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) { | |
| 811 merged += box(j); | |
| 812 j++; | |
| 813 } | |
| 814 | |
| 815 if (j < num_chars() && significant_overlap(ibody, box(j))) { | |
| 816 if (!is_final(j)) { | |
| 817 intersecting = true; | |
| 818 } | |
| 819 } else { | |
| 820 not_intersecting = true; | |
| 821 if (j - i > 0) { | |
| 822 if (j - i == 1) { | |
| 823 if (!skipped_whitespaces) { | |
| 824 mark_good(i); | |
| 825 } | |
| 826 if (box(i).width() <= estimated_pitch_ * 0.5) { | |
| 827 ibody += box(i); | |
| 828 character(i)->set_box(ibody); | |
| 829 } | |
| 830 character(i)->set_merge_to_prev(false); | |
| 831 finalize(i); | |
| 832 } else { | |
| 833 for (size_t k = i + 1; k < j; k++) { | |
| 834 character(k)->set_merge_to_prev(true); | |
| 835 } | |
| 836 } | |
| 837 } | |
| 838 } | |
| 839 } | |
| 840 | |
| 841 // This character doesn't fit well into the estimated imaginary | |
| 842 // bodies. Mark it as bad. | |
| 843 if (intersecting && !not_intersecting) { | |
| 844 mark_bad(i); | |
| 845 } | |
| 846 if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) { | |
| 847 changed = true; | |
| 848 } | |
| 849 } | |
| 850 | |
| 851 return changed; | |
| 852 } | |
| 853 | |
| 854 void FPRow::MergeFragments() { | |
| 855 int last_char = 0; | |
| 856 | |
| 857 for (size_t j = 0; j < num_chars(); ++j) { | |
| 858 if (character(j)->merge_to_prev()) { | |
| 859 character(last_char)->Merge(*character(j)); | |
| 860 character(j)->set_delete_flag(true); | |
| 861 clear_alignment(last_char); | |
| 862 character(j - 1)->set_merge_to_prev(false); | |
| 863 } else { | |
| 864 last_char = j; | |
| 865 } | |
| 866 } | |
| 867 DeleteChars(); | |
| 868 } | |
| 869 | |
| 870 void FPRow::FinalizeLargeChars() { | |
| 871 float row_pitch = estimated_pitch(); | |
| 872 for (size_t i = 0; i < num_chars(); i++) { | |
| 873 if (is_final(i)) { | |
| 874 continue; | |
| 875 } | |
| 876 | |
| 877 // Finalize if both neighbors are finalized. We have no other choice. | |
| 878 if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) { | |
| 879 finalize(i); | |
| 880 continue; | |
| 881 } | |
| 882 | |
| 883 float cx = center_x(i); | |
| 884 TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1); | |
| 885 if (i > 0) { | |
| 886 // The preceding character significantly intersects with the | |
| 887 // imaginary body of this character. Let Pass2Analyze() handle | |
| 888 // this case. | |
| 889 if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) { | |
| 890 continue; | |
| 891 } | |
| 892 if (!is_final(i - 1)) { | |
| 893 TBOX merged = box(i); | |
| 894 merged += box(i - 1); | |
| 895 if (merged.width() < row_pitch) { | |
| 896 continue; | |
| 897 } | |
| 898 // This character cannot be finalized yet because it can be | |
| 899 // merged with the previous one. Again, let Pass2Analyze() | |
| 900 // handle this case. | |
| 901 } | |
| 902 } | |
| 903 if (i < num_chars() - 1) { | |
| 904 if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) { | |
| 905 continue; | |
| 906 } | |
| 907 if (!is_final(i + 1)) { | |
| 908 TBOX merged = box(i); | |
| 909 merged += box(i + 1); | |
| 910 if (merged.width() < row_pitch) { | |
| 911 continue; | |
| 912 } | |
| 913 } | |
| 914 } | |
| 915 finalize(i); | |
| 916 } | |
| 917 | |
| 918 // Update alignment decision. We only consider finalized characters | |
| 919 // in pass2. E.g. if a finalized character C has another finalized | |
| 920 // character L on its left and a not-finalized character R on its | |
| 921 // right, we mark C as good if the pitch between C and L is good, | |
| 922 // regardless of the pitch between C and R. | |
| 923 for (size_t i = 0; i < num_chars(); i++) { | |
| 924 if (!is_final(i)) { | |
| 925 continue; | |
| 926 } | |
| 927 bool good_pitch = false; | |
| 928 bool bad_pitch = false; | |
| 929 if (i > 0 && is_final(i - 1)) { | |
| 930 if (is_good_pitch(row_pitch, box(i - 1), box(i))) { | |
| 931 good_pitch = true; | |
| 932 } else { | |
| 933 bad_pitch = true; | |
| 934 } | |
| 935 } | |
| 936 if (i < num_chars() - 1 && is_final(i + 1)) { | |
| 937 if (is_good_pitch(row_pitch, box(i), box(i + 1))) { | |
| 938 good_pitch = true; | |
| 939 } else { | |
| 940 bad_pitch = true; | |
| 941 } | |
| 942 } | |
| 943 if (good_pitch && !bad_pitch) { | |
| 944 mark_good(i); | |
| 945 } else if (!good_pitch && bad_pitch) { | |
| 946 mark_bad(i); | |
| 947 } | |
| 948 } | |
| 949 } | |
| 950 | |
| 951 class FPAnalyzer { | |
| 952 public: | |
| 953 FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); | |
| 954 ~FPAnalyzer() = default; | |
| 955 | |
| 956 void Pass1Analyze() { | |
| 957 for (auto &row : rows_) { | |
| 958 row.Pass1Analyze(); | |
| 959 } | |
| 960 } | |
| 961 | |
| 962 // Estimate character pitch for each row. The argument pass1 can be | |
| 963 // set to true if the function is called after Pass1Analyze(), to | |
| 964 // eliminate some redundant computation. | |
| 965 void EstimatePitch(bool pass1); | |
| 966 | |
| 967 bool maybe_fixed_pitch() { | |
| 968 if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) { | |
| 969 return false; | |
| 970 } | |
| 971 return true; | |
| 972 } | |
| 973 | |
| 974 void MergeFragments() { | |
| 975 for (auto &row : rows_) { | |
| 976 row.MergeFragments(); | |
| 977 } | |
| 978 } | |
| 979 | |
| 980 void FinalizeLargeChars() { | |
| 981 for (auto &row : rows_) { | |
| 982 row.FinalizeLargeChars(); | |
| 983 } | |
| 984 } | |
| 985 | |
| 986 bool Pass2Analyze() { | |
| 987 bool changed = false; | |
| 988 for (auto &row : rows_) { | |
| 989 if (row.Pass2Analyze()) { | |
| 990 changed = true; | |
| 991 } | |
| 992 } | |
| 993 return changed; | |
| 994 } | |
| 995 | |
| 996 void OutputEstimations() { | |
| 997 for (auto &row : rows_) { | |
| 998 row.OutputEstimations(); | |
| 999 } | |
| 1000 // Don't we need page-level estimation of gaps/spaces? | |
| 1001 } | |
| 1002 | |
| 1003 void DebugOutputResult() { | |
| 1004 tprintf("FPAnalyzer: final result\n"); | |
| 1005 for (size_t i = 0; i < rows_.size(); i++) { | |
| 1006 rows_[i].DebugOutputResult(i); | |
| 1007 } | |
| 1008 } | |
| 1009 | |
| 1010 size_t num_rows() { | |
| 1011 return rows_.size(); | |
| 1012 } | |
| 1013 | |
| 1014 // Returns the upper limit for pass2 loop iteration. | |
| 1015 unsigned max_iteration() { | |
| 1016 // We're fixing at least one character per iteration. So basically | |
| 1017 // we shouldn't require more than max_chars_per_row_ iterations. | |
| 1018 return max_chars_per_row_ + 100; | |
| 1019 } | |
| 1020 | |
| 1021 private: | |
| 1022 ICOORD page_tr_; | |
| 1023 std::vector<FPRow> rows_; | |
| 1024 unsigned num_tall_rows_; | |
| 1025 unsigned num_bad_rows_; | |
| 1026 // TODO: num_empty_rows_ is incremented, but never used otherwise. | |
| 1027 unsigned num_empty_rows_; | |
| 1028 unsigned max_chars_per_row_; | |
| 1029 }; | |
| 1030 | |
| 1031 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) | |
| 1032 : page_tr_(page_tr) | |
| 1033 , num_tall_rows_(0) | |
| 1034 , num_bad_rows_(0) | |
| 1035 , num_empty_rows_(0) | |
| 1036 , max_chars_per_row_(0) { | |
| 1037 TO_BLOCK_IT block_it(port_blocks); | |
| 1038 | |
| 1039 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { | |
| 1040 TO_BLOCK *block = block_it.data(); | |
| 1041 if (!block->get_rows()->empty()) { | |
| 1042 ASSERT_HOST(block->xheight > 0); | |
| 1043 find_repeated_chars(block, false); | |
| 1044 } | |
| 1045 } | |
| 1046 | |
| 1047 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { | |
| 1048 TO_ROW_IT row_it = block_it.data()->get_rows(); | |
| 1049 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 1050 FPRow row; | |
| 1051 row.Init(row_it.data()); | |
| 1052 rows_.push_back(row); | |
| 1053 size_t num_chars = rows_.back().num_chars(); | |
| 1054 if (num_chars <= 1) { | |
| 1055 num_empty_rows_++; | |
| 1056 } | |
| 1057 if (num_chars > max_chars_per_row_) { | |
| 1058 max_chars_per_row_ = num_chars; | |
| 1059 } | |
| 1060 } | |
| 1061 } | |
| 1062 } | |
| 1063 | |
| 1064 void FPAnalyzer::EstimatePitch(bool pass1) { | |
| 1065 LocalCorrelation pitch_height_stats; | |
| 1066 | |
| 1067 num_tall_rows_ = 0; | |
| 1068 num_bad_rows_ = 0; | |
| 1069 pitch_height_stats.Clear(); | |
| 1070 for (auto &row : rows_) { | |
| 1071 row.EstimatePitch(pass1); | |
| 1072 if (row.good_pitches()) { | |
| 1073 pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches()); | |
| 1074 if (row.height_pitch_ratio() > 1.1) { | |
| 1075 num_tall_rows_++; | |
| 1076 } | |
| 1077 } else { | |
| 1078 num_bad_rows_++; | |
| 1079 } | |
| 1080 } | |
| 1081 | |
| 1082 pitch_height_stats.Finish(); | |
| 1083 for (auto &row : rows_) { | |
| 1084 if (row.good_pitches() >= 5) { | |
| 1085 // We have enough evidences. Just use the pitch estimation | |
| 1086 // from this row. | |
| 1087 row.set_estimated_pitch(row.pitch()); | |
| 1088 } else if (row.num_chars() > 1) { | |
| 1089 float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f); | |
| 1090 // CJK characters are more likely to be fragmented than poorly | |
| 1091 // chopped. So trust the page-level estimation of character | |
| 1092 // pitch only if it's larger than row-level estimation or | |
| 1093 // row-level estimation is too large (2x bigger than row height). | |
| 1094 if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) { | |
| 1095 row.set_estimated_pitch(estimated_pitch); | |
| 1096 } else { | |
| 1097 row.set_estimated_pitch(row.pitch()); | |
| 1098 } | |
| 1099 } | |
| 1100 } | |
| 1101 } | |
| 1102 | |
| 1103 void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { | |
| 1104 FPAnalyzer analyzer(page_tr, port_blocks); | |
| 1105 if (analyzer.num_rows() == 0) { | |
| 1106 return; | |
| 1107 } | |
| 1108 | |
| 1109 analyzer.Pass1Analyze(); | |
| 1110 analyzer.EstimatePitch(true); | |
| 1111 | |
| 1112 // Perform pass1 analysis again with the initial estimation of row | |
| 1113 // pitches, for better estimation. | |
| 1114 analyzer.Pass1Analyze(); | |
| 1115 analyzer.EstimatePitch(true); | |
| 1116 | |
| 1117 // Early exit if the page doesn't seem to contain fixed pitch rows. | |
| 1118 if (!analyzer.maybe_fixed_pitch()) { | |
| 1119 if (textord_debug_pitch_test) { | |
| 1120 tprintf("Page doesn't seem to contain fixed pitch rows\n"); | |
| 1121 } | |
| 1122 return; | |
| 1123 } | |
| 1124 | |
| 1125 unsigned iteration = 0; | |
| 1126 do { | |
| 1127 analyzer.MergeFragments(); | |
| 1128 analyzer.FinalizeLargeChars(); | |
| 1129 analyzer.EstimatePitch(false); | |
| 1130 iteration++; | |
| 1131 } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration()); | |
| 1132 | |
| 1133 if (textord_debug_pitch_test) { | |
| 1134 tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration, | |
| 1135 analyzer.max_iteration()); | |
| 1136 } | |
| 1137 | |
| 1138 analyzer.OutputEstimations(); | |
| 1139 if (textord_debug_pitch_test) { | |
| 1140 analyzer.DebugOutputResult(); | |
| 1141 } | |
| 1142 } | |
| 1143 | |
| 1144 } // namespace tesseract |
