Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/pithsync.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: pithsync.cpp (Formerly pitsync2.c) | |
| 3 * Description: Code to find the optimum fixed pitch segmentation of some blobs. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1992, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "pithsync.h" | |
| 20 | |
| 21 #include "makerow.h" | |
| 22 #include "pitsync1.h" | |
| 23 #include "topitch.h" | |
| 24 #include "tprintf.h" | |
| 25 | |
| 26 #include <cfloat> // for FLT_MAX | |
| 27 #include <cmath> | |
| 28 #include <vector> // for std::vector | |
| 29 | |
| 30 namespace tesseract { | |
| 31 | |
| 32 /********************************************************************** | |
| 33 * FPCUTPT::setup | |
| 34 * | |
| 35 * Constructor to make a new FPCUTPT. | |
| 36 **********************************************************************/ | |
| 37 | |
| 38 void FPCUTPT::setup( // constructor | |
| 39 FPCUTPT *cutpts, // predecessors | |
| 40 int16_t array_origin, // start coord | |
| 41 STATS *projection, // vertical occupation | |
| 42 int16_t zero_count, // official zero | |
| 43 int16_t pitch, // proposed pitch | |
| 44 int16_t x, // position | |
| 45 int16_t offset // dist to gap | |
| 46 ) { | |
| 47 // half of pitch | |
| 48 int16_t half_pitch = pitch / 2 - 1; | |
| 49 uint32_t lead_flag; // new flag | |
| 50 int32_t ind; // current position | |
| 51 | |
| 52 if (half_pitch > 31) { | |
| 53 half_pitch = 31; | |
| 54 } else if (half_pitch < 0) { | |
| 55 half_pitch = 0; | |
| 56 } | |
| 57 lead_flag = 1 << half_pitch; | |
| 58 | |
| 59 pred = nullptr; | |
| 60 mean_sum = 0; | |
| 61 sq_sum = offset * offset; | |
| 62 cost = sq_sum; | |
| 63 faked = false; | |
| 64 terminal = false; | |
| 65 fake_count = 0; | |
| 66 xpos = x; | |
| 67 region_index = 0; | |
| 68 mid_cuts = 0; | |
| 69 if (x == array_origin) { | |
| 70 back_balance = 0; | |
| 71 fwd_balance = 0; | |
| 72 for (ind = 0; ind <= half_pitch; ind++) { | |
| 73 fwd_balance >>= 1; | |
| 74 if (projection->pile_count(ind) > zero_count) { | |
| 75 fwd_balance |= lead_flag; | |
| 76 } | |
| 77 } | |
| 78 } else { | |
| 79 back_balance = cutpts[x - 1 - array_origin].back_balance << 1; | |
| 80 back_balance &= lead_flag + (lead_flag - 1); | |
| 81 if (projection->pile_count(x) > zero_count) { | |
| 82 back_balance |= 1; | |
| 83 } | |
| 84 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; | |
| 85 if (projection->pile_count(x + half_pitch) > zero_count) { | |
| 86 fwd_balance |= lead_flag; | |
| 87 } | |
| 88 } | |
| 89 } | |
| 90 | |
| 91 /********************************************************************** | |
| 92 * FPCUTPT::assign | |
| 93 * | |
| 94 * Constructor to make a new FPCUTPT. | |
| 95 **********************************************************************/ | |
| 96 | |
| 97 void FPCUTPT::assign( // constructor | |
| 98 FPCUTPT *cutpts, // predecessors | |
| 99 int16_t array_origin, // start coord | |
| 100 int16_t x, // position | |
| 101 bool faking, // faking this one | |
| 102 bool mid_cut, // cheap cut. | |
| 103 int16_t offset, // dist to gap | |
| 104 STATS *projection, // vertical occupation | |
| 105 float projection_scale, // scaling | |
| 106 int16_t zero_count, // official zero | |
| 107 int16_t pitch, // proposed pitch | |
| 108 int16_t pitch_error // allowed tolerance | |
| 109 ) { | |
| 110 int index; // test index | |
| 111 int balance_index; // for balance factor | |
| 112 int16_t balance_count; // ding factor | |
| 113 int16_t r_index; // test cut number | |
| 114 FPCUTPT *segpt; // segment point | |
| 115 int32_t dist; // from prev segment | |
| 116 double sq_dist; // squared distance | |
| 117 double mean; // mean pitch | |
| 118 double total; // total dists | |
| 119 double factor; // cost function | |
| 120 // half of pitch | |
| 121 int16_t half_pitch = pitch / 2 - 1; | |
| 122 uint32_t lead_flag; // new flag | |
| 123 | |
| 124 if (half_pitch > 31) { | |
| 125 half_pitch = 31; | |
| 126 } else if (half_pitch < 0) { | |
| 127 half_pitch = 0; | |
| 128 } | |
| 129 lead_flag = 1 << half_pitch; | |
| 130 | |
| 131 back_balance = cutpts[x - 1 - array_origin].back_balance << 1; | |
| 132 back_balance &= lead_flag + (lead_flag - 1); | |
| 133 if (projection->pile_count(x) > zero_count) { | |
| 134 back_balance |= 1; | |
| 135 } | |
| 136 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; | |
| 137 if (projection->pile_count(x + half_pitch) > zero_count) { | |
| 138 fwd_balance |= lead_flag; | |
| 139 } | |
| 140 | |
| 141 xpos = x; | |
| 142 cost = FLT_MAX; | |
| 143 pred = nullptr; | |
| 144 faked = faking; | |
| 145 terminal = false; | |
| 146 region_index = 0; | |
| 147 fake_count = INT16_MAX; | |
| 148 for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; index++) { | |
| 149 if (index >= array_origin) { | |
| 150 segpt = &cutpts[index - array_origin]; | |
| 151 dist = x - segpt->xpos; | |
| 152 if (!segpt->terminal && segpt->fake_count < INT16_MAX) { | |
| 153 balance_count = 0; | |
| 154 if (textord_balance_factor > 0) { | |
| 155 if (textord_fast_pitch_test) { | |
| 156 lead_flag = back_balance ^ segpt->fwd_balance; | |
| 157 balance_count = 0; | |
| 158 while (lead_flag != 0) { | |
| 159 balance_count++; | |
| 160 lead_flag &= lead_flag - 1; | |
| 161 } | |
| 162 } else { | |
| 163 for (balance_index = 0; index + balance_index < x - balance_index; balance_index++) { | |
| 164 balance_count += (projection->pile_count(index + balance_index) <= zero_count) ^ | |
| 165 (projection->pile_count(x - balance_index) <= zero_count); | |
| 166 } | |
| 167 } | |
| 168 balance_count = | |
| 169 static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale); | |
| 170 } | |
| 171 r_index = segpt->region_index + 1; | |
| 172 total = segpt->mean_sum + dist; | |
| 173 balance_count += offset; | |
| 174 sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count; | |
| 175 mean = total / r_index; | |
| 176 factor = mean - pitch; | |
| 177 factor *= factor; | |
| 178 factor += sq_dist / (r_index)-mean * mean; | |
| 179 if (factor < cost && segpt->fake_count + faked <= fake_count) { | |
| 180 cost = factor; // find least cost | |
| 181 pred = segpt; // save path | |
| 182 mean_sum = total; | |
| 183 sq_sum = sq_dist; | |
| 184 fake_count = segpt->fake_count + faked; | |
| 185 mid_cuts = segpt->mid_cuts + mid_cut; | |
| 186 region_index = r_index; | |
| 187 } | |
| 188 } | |
| 189 } | |
| 190 } | |
| 191 } | |
| 192 | |
| 193 /********************************************************************** | |
| 194 * FPCUTPT::assign_cheap | |
| 195 * | |
| 196 * Constructor to make a new FPCUTPT on the cheap. | |
| 197 **********************************************************************/ | |
| 198 | |
| 199 void FPCUTPT::assign_cheap( // constructor | |
| 200 FPCUTPT *cutpts, // predecessors | |
| 201 int16_t array_origin, // start coord | |
| 202 int16_t x, // position | |
| 203 bool faking, // faking this one | |
| 204 bool mid_cut, // cheap cut. | |
| 205 int16_t offset, // dist to gap | |
| 206 STATS *projection, // vertical occupation | |
| 207 float projection_scale, // scaling | |
| 208 int16_t zero_count, // official zero | |
| 209 int16_t pitch, // proposed pitch | |
| 210 int16_t pitch_error // allowed tolerance | |
| 211 ) { | |
| 212 int index; // test index | |
| 213 int16_t balance_count; // ding factor | |
| 214 int16_t r_index; // test cut number | |
| 215 FPCUTPT *segpt; // segment point | |
| 216 int32_t dist; // from prev segment | |
| 217 double sq_dist; // squared distance | |
| 218 double mean; // mean pitch | |
| 219 double total; // total dists | |
| 220 double factor; // cost function | |
| 221 // half of pitch | |
| 222 int16_t half_pitch = pitch / 2 - 1; | |
| 223 uint32_t lead_flag; // new flag | |
| 224 | |
| 225 if (half_pitch > 31) { | |
| 226 half_pitch = 31; | |
| 227 } else if (half_pitch < 0) { | |
| 228 half_pitch = 0; | |
| 229 } | |
| 230 lead_flag = 1 << half_pitch; | |
| 231 | |
| 232 back_balance = cutpts[x - 1 - array_origin].back_balance << 1; | |
| 233 back_balance &= lead_flag + (lead_flag - 1); | |
| 234 if (projection->pile_count(x) > zero_count) { | |
| 235 back_balance |= 1; | |
| 236 } | |
| 237 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; | |
| 238 if (projection->pile_count(x + half_pitch) > zero_count) { | |
| 239 fwd_balance |= lead_flag; | |
| 240 } | |
| 241 | |
| 242 xpos = x; | |
| 243 cost = FLT_MAX; | |
| 244 pred = nullptr; | |
| 245 faked = faking; | |
| 246 terminal = false; | |
| 247 region_index = 0; | |
| 248 fake_count = INT16_MAX; | |
| 249 index = x - pitch; | |
| 250 if (index >= array_origin) { | |
| 251 segpt = &cutpts[index - array_origin]; | |
| 252 dist = x - segpt->xpos; | |
| 253 if (!segpt->terminal && segpt->fake_count < INT16_MAX) { | |
| 254 balance_count = 0; | |
| 255 if (textord_balance_factor > 0) { | |
| 256 lead_flag = back_balance ^ segpt->fwd_balance; | |
| 257 balance_count = 0; | |
| 258 while (lead_flag != 0) { | |
| 259 balance_count++; | |
| 260 lead_flag &= lead_flag - 1; | |
| 261 } | |
| 262 balance_count = | |
| 263 static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale); | |
| 264 } | |
| 265 r_index = segpt->region_index + 1; | |
| 266 total = segpt->mean_sum + dist; | |
| 267 balance_count += offset; | |
| 268 sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count; | |
| 269 mean = total / r_index; | |
| 270 factor = mean - pitch; | |
| 271 factor *= factor; | |
| 272 factor += sq_dist / (r_index)-mean * mean; | |
| 273 cost = factor; // find least cost | |
| 274 pred = segpt; // save path | |
| 275 mean_sum = total; | |
| 276 sq_sum = sq_dist; | |
| 277 fake_count = segpt->fake_count + faked; | |
| 278 mid_cuts = segpt->mid_cuts + mid_cut; | |
| 279 region_index = r_index; | |
| 280 } | |
| 281 } | |
| 282 } | |
| 283 | |
| 284 /********************************************************************** | |
| 285 * check_pitch_sync | |
| 286 * | |
| 287 * Construct the lattice of possible segmentation points and choose the | |
| 288 * optimal path. Return the optimal path only. | |
| 289 * The return value is a measure of goodness of the sync. | |
| 290 **********************************************************************/ | |
| 291 | |
| 292 double check_pitch_sync2( // find segmentation | |
| 293 BLOBNBOX_IT *blob_it, // blobs to do | |
| 294 int16_t blob_count, // no of blobs | |
| 295 int16_t pitch, // pitch estimate | |
| 296 int16_t pitch_error, // tolerance | |
| 297 STATS *projection, // vertical | |
| 298 int16_t projection_left, // edges //scale factor | |
| 299 int16_t projection_right, float projection_scale, | |
| 300 int16_t &occupation_count, // no of occupied cells | |
| 301 FPSEGPT_LIST *seg_list, // output list | |
| 302 int16_t start, // start of good range | |
| 303 int16_t end // end of good range | |
| 304 ) { | |
| 305 bool faking; // illegal cut pt | |
| 306 bool mid_cut; // cheap cut pt. | |
| 307 int16_t x; // current coord | |
| 308 int16_t blob_index; // blob number | |
| 309 int16_t left_edge; // of word | |
| 310 int16_t right_edge; // of word | |
| 311 int16_t array_origin; // x coord of array | |
| 312 int16_t offset; // dist to legal area | |
| 313 int16_t zero_count; // projection zero | |
| 314 int16_t best_left_x = 0; // for equals | |
| 315 int16_t best_right_x = 0; // right edge | |
| 316 TBOX this_box; // bounding box | |
| 317 TBOX next_box; // box of next blob | |
| 318 FPSEGPT *segpt; // segment point | |
| 319 double mean_sum; // computes result | |
| 320 int16_t best_fake; // best fake level | |
| 321 int16_t best_count; // no of cuts | |
| 322 BLOBNBOX_IT this_it; // copy iterator | |
| 323 FPSEGPT_IT seg_it = seg_list; // output iterator | |
| 324 | |
| 325 // tprintf("Computing sync on word of %d blobs with pitch %d\n", | |
| 326 // blob_count, pitch); | |
| 327 // if (blob_count==8 && pitch==27) | |
| 328 // projection->print(stdout,true); | |
| 329 zero_count = 0; | |
| 330 if (pitch < 3) { | |
| 331 pitch = 3; // nothing ludicrous | |
| 332 } | |
| 333 if ((pitch - 3) / 2 < pitch_error) { | |
| 334 pitch_error = (pitch - 3) / 2; | |
| 335 } | |
| 336 this_it = *blob_it; | |
| 337 this_box = box_next(&this_it); // get box | |
| 338 // left_edge=this_box.left(); //left of word right_edge=this_box.right(); | |
| 339 // for (blob_index=1;blob_index<blob_count;blob_index++) | |
| 340 // { | |
| 341 // this_box=box_next(&this_it); | |
| 342 // if (this_box.right()>right_edge) | |
| 343 // right_edge=this_box.right(); | |
| 344 // } | |
| 345 for (left_edge = projection_left; | |
| 346 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) { | |
| 347 ; | |
| 348 } | |
| 349 for (right_edge = projection_right; | |
| 350 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) { | |
| 351 ; | |
| 352 } | |
| 353 ASSERT_HOST(right_edge >= left_edge); | |
| 354 if (pitsync_linear_version >= 4) { | |
| 355 return check_pitch_sync3(projection_left, projection_right, zero_count, pitch, pitch_error, | |
| 356 projection, projection_scale, occupation_count, seg_list, start, end); | |
| 357 } | |
| 358 array_origin = left_edge - pitch; | |
| 359 // array of points | |
| 360 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1); | |
| 361 for (x = array_origin; x < left_edge; x++) { | |
| 362 // free cuts | |
| 363 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0); | |
| 364 } | |
| 365 for (offset = 0; offset <= pitch_error; offset++, x++) { | |
| 366 // not quite free | |
| 367 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, | |
| 368 offset); | |
| 369 } | |
| 370 | |
| 371 this_it = *blob_it; | |
| 372 this_box = box_next(&this_it); // first box | |
| 373 next_box = box_next(&this_it); // second box | |
| 374 blob_index = 1; | |
| 375 while (x < right_edge - pitch_error) { | |
| 376 if (x > this_box.right() + pitch_error && blob_index < blob_count) { | |
| 377 this_box = next_box; | |
| 378 next_box = box_next(&this_it); | |
| 379 blob_index++; | |
| 380 } | |
| 381 faking = false; | |
| 382 mid_cut = false; | |
| 383 if (x <= this_box.left()) { | |
| 384 offset = 0; | |
| 385 } else if (x <= this_box.left() + pitch_error) { | |
| 386 offset = x - this_box.left(); | |
| 387 } else if (x >= this_box.right()) { | |
| 388 offset = 0; | |
| 389 } else if (x >= next_box.left() && blob_index < blob_count) { | |
| 390 offset = x - next_box.left(); | |
| 391 if (this_box.right() - x < offset) { | |
| 392 offset = this_box.right() - x; | |
| 393 } | |
| 394 } else if (x >= this_box.right() - pitch_error) { | |
| 395 offset = this_box.right() - x; | |
| 396 } else if (x - this_box.left() > pitch * pitsync_joined_edge && | |
| 397 this_box.right() - x > pitch * pitsync_joined_edge) { | |
| 398 mid_cut = true; | |
| 399 offset = 0; | |
| 400 } else { | |
| 401 faking = true; | |
| 402 offset = projection->pile_count(x); | |
| 403 } | |
| 404 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset, | |
| 405 projection, projection_scale, zero_count, pitch, pitch_error); | |
| 406 x++; | |
| 407 } | |
| 408 | |
| 409 best_fake = INT16_MAX; | |
| 410 // best path | |
| 411 double best_cost = INT32_MAX; | |
| 412 best_count = INT16_MAX; | |
| 413 while (x < right_edge + pitch) { | |
| 414 offset = x < right_edge ? right_edge - x : 0; | |
| 415 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection, | |
| 416 projection_scale, zero_count, pitch, pitch_error); | |
| 417 cutpts[x - array_origin].terminal = true; | |
| 418 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <= | |
| 419 best_count + best_fake) { | |
| 420 if (cutpts[x - array_origin].fake_count < best_fake || | |
| 421 (cutpts[x - array_origin].fake_count == best_fake && | |
| 422 cutpts[x - array_origin].cost_function() < best_cost)) { | |
| 423 best_fake = cutpts[x - array_origin].fake_count; | |
| 424 best_cost = cutpts[x - array_origin].cost_function(); | |
| 425 best_left_x = x; | |
| 426 best_right_x = x; | |
| 427 best_count = cutpts[x - array_origin].index(); | |
| 428 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 && | |
| 429 cutpts[x - array_origin].cost_function() == best_cost) { | |
| 430 // exactly equal | |
| 431 best_right_x = x; | |
| 432 } | |
| 433 } | |
| 434 x++; | |
| 435 } | |
| 436 ASSERT_HOST(best_fake < INT16_MAX); | |
| 437 | |
| 438 // end of best path | |
| 439 FPCUTPT *best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; | |
| 440 if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) { | |
| 441 for (x = left_edge - pitch; x < right_edge + pitch; x++) { | |
| 442 tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, cutpts[x - array_origin].cost_function(), | |
| 443 cutpts[x - array_origin].sum(), cutpts[x - array_origin].squares(), | |
| 444 cutpts[x - array_origin].previous()->position()); | |
| 445 } | |
| 446 } | |
| 447 occupation_count = -1; | |
| 448 do { | |
| 449 for (x = best_end->position() - pitch + pitch_error; | |
| 450 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) { | |
| 451 ; | |
| 452 } | |
| 453 if (x < best_end->position() - pitch_error) { | |
| 454 occupation_count++; | |
| 455 } | |
| 456 // copy it | |
| 457 segpt = new FPSEGPT(best_end); | |
| 458 seg_it.add_before_then_move(segpt); | |
| 459 best_end = best_end->previous(); | |
| 460 } while (best_end != nullptr); | |
| 461 seg_it.move_to_last(); | |
| 462 mean_sum = seg_it.data()->sum(); | |
| 463 mean_sum = mean_sum * mean_sum / best_count; | |
| 464 if (seg_it.data()->squares() - mean_sum < 0) { | |
| 465 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(), | |
| 466 seg_it.data()->sum(), best_count); | |
| 467 } | |
| 468 // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n", | |
| 469 // blob_count,pitch,seg_it.data()->squares()-mean_sum, | |
| 470 // occupation_count); | |
| 471 return seg_it.data()->squares() - mean_sum; | |
| 472 } | |
| 473 | |
| 474 /********************************************************************** | |
| 475 * check_pitch_sync | |
| 476 * | |
| 477 * Construct the lattice of possible segmentation points and choose the | |
| 478 * optimal path. Return the optimal path only. | |
| 479 * The return value is a measure of goodness of the sync. | |
| 480 **********************************************************************/ | |
| 481 | |
| 482 double check_pitch_sync3( // find segmentation | |
| 483 int16_t projection_left, // edges //to be considered 0 | |
| 484 int16_t projection_right, int16_t zero_count, | |
| 485 int16_t pitch, // pitch estimate | |
| 486 int16_t pitch_error, // tolerance | |
| 487 STATS *projection, // vertical | |
| 488 float projection_scale, // scale factor | |
| 489 int16_t &occupation_count, // no of occupied cells | |
| 490 FPSEGPT_LIST *seg_list, // output list | |
| 491 int16_t start, // start of good range | |
| 492 int16_t end // end of good range | |
| 493 ) { | |
| 494 bool faking; // illegal cut pt | |
| 495 bool mid_cut; // cheap cut pt. | |
| 496 int16_t left_edge; // of word | |
| 497 int16_t right_edge; // of word | |
| 498 int16_t x; // current coord | |
| 499 int16_t array_origin; // x coord of array | |
| 500 int16_t offset; // dist to legal area | |
| 501 int16_t projection_offset; // from scaled projection | |
| 502 int16_t prev_zero; // previous zero dist | |
| 503 int16_t next_zero; // next zero dist | |
| 504 int16_t zero_offset; // scan window | |
| 505 int16_t best_left_x = 0; // for equals | |
| 506 int16_t best_right_x = 0; // right edge | |
| 507 FPSEGPT *segpt; // segment point | |
| 508 int minindex; // next input position | |
| 509 int test_index; // index to mins | |
| 510 double mean_sum; // computes result | |
| 511 int16_t best_fake; // best fake level | |
| 512 int16_t best_count; // no of cuts | |
| 513 FPSEGPT_IT seg_it = seg_list; // output iterator | |
| 514 | |
| 515 end = (end - start) % pitch; | |
| 516 if (pitch < 3) { | |
| 517 pitch = 3; // nothing ludicrous | |
| 518 } | |
| 519 if ((pitch - 3) / 2 < pitch_error) { | |
| 520 pitch_error = (pitch - 3) / 2; | |
| 521 } | |
| 522 // min dist of zero | |
| 523 zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge); | |
| 524 for (left_edge = projection_left; | |
| 525 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) { | |
| 526 ; | |
| 527 } | |
| 528 for (right_edge = projection_right; | |
| 529 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) { | |
| 530 ; | |
| 531 } | |
| 532 array_origin = left_edge - pitch; | |
| 533 // array of points | |
| 534 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1); | |
| 535 // local min results | |
| 536 std::vector<bool> mins(pitch_error * 2 + 1); | |
| 537 for (x = array_origin; x < left_edge; x++) { | |
| 538 // free cuts | |
| 539 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0); | |
| 540 } | |
| 541 prev_zero = left_edge - 1; | |
| 542 for (offset = 0; offset <= pitch_error; offset++, x++) { | |
| 543 // not quite free | |
| 544 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, | |
| 545 offset); | |
| 546 } | |
| 547 | |
| 548 for (offset = -pitch_error, minindex = 0; offset < pitch_error; offset++, minindex++) { | |
| 549 mins[minindex] = projection->local_min(x + offset); | |
| 550 } | |
| 551 next_zero = x + zero_offset + 1; | |
| 552 for (offset = next_zero - 1; offset >= x; offset--) { | |
| 553 if (projection->pile_count(offset) <= zero_count) { | |
| 554 next_zero = offset; | |
| 555 break; | |
| 556 } | |
| 557 } | |
| 558 while (x < right_edge - pitch_error) { | |
| 559 mins[minindex] = projection->local_min(x + pitch_error); | |
| 560 minindex++; | |
| 561 if (minindex > pitch_error * 2) { | |
| 562 minindex = 0; | |
| 563 } | |
| 564 faking = false; | |
| 565 mid_cut = false; | |
| 566 offset = 0; | |
| 567 if (projection->pile_count(x) <= zero_count) { | |
| 568 prev_zero = x; | |
| 569 } else { | |
| 570 for (offset = 1; offset <= pitch_error; offset++) { | |
| 571 if (projection->pile_count(x + offset) <= zero_count || | |
| 572 projection->pile_count(x - offset) <= zero_count) { | |
| 573 break; | |
| 574 } | |
| 575 } | |
| 576 } | |
| 577 if (offset > pitch_error) { | |
| 578 if (x - prev_zero > zero_offset && next_zero - x > zero_offset) { | |
| 579 for (offset = 0; offset <= pitch_error; offset++) { | |
| 580 test_index = minindex + pitch_error + offset; | |
| 581 if (test_index > pitch_error * 2) { | |
| 582 test_index -= pitch_error * 2 + 1; | |
| 583 } | |
| 584 if (mins[test_index]) { | |
| 585 break; | |
| 586 } | |
| 587 test_index = minindex + pitch_error - offset; | |
| 588 if (test_index > pitch_error * 2) { | |
| 589 test_index -= pitch_error * 2 + 1; | |
| 590 } | |
| 591 if (mins[test_index]) { | |
| 592 break; | |
| 593 } | |
| 594 } | |
| 595 } | |
| 596 if (offset > pitch_error) { | |
| 597 offset = projection->pile_count(x); | |
| 598 faking = true; | |
| 599 } else { | |
| 600 projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale); | |
| 601 if (projection_offset > offset) { | |
| 602 offset = projection_offset; | |
| 603 } | |
| 604 mid_cut = true; | |
| 605 } | |
| 606 } | |
| 607 if ((start == 0 && end == 0) || !textord_fast_pitch_test || | |
| 608 (x - projection_left - start) % pitch <= end) { | |
| 609 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset, | |
| 610 projection, projection_scale, zero_count, pitch, pitch_error); | |
| 611 } else { | |
| 612 cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, faking, mid_cut, offset, | |
| 613 projection, projection_scale, zero_count, pitch, | |
| 614 pitch_error); | |
| 615 } | |
| 616 x++; | |
| 617 if (next_zero < x || next_zero == x + zero_offset) { | |
| 618 next_zero = x + zero_offset + 1; | |
| 619 } | |
| 620 if (projection->pile_count(x + zero_offset) <= zero_count) { | |
| 621 next_zero = x + zero_offset; | |
| 622 } | |
| 623 } | |
| 624 | |
| 625 best_fake = INT16_MAX; | |
| 626 // best path | |
| 627 double best_cost = INT32_MAX; | |
| 628 best_count = INT16_MAX; | |
| 629 while (x < right_edge + pitch) { | |
| 630 offset = x < right_edge ? right_edge - x : 0; | |
| 631 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection, | |
| 632 projection_scale, zero_count, pitch, pitch_error); | |
| 633 cutpts[x - array_origin].terminal = true; | |
| 634 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <= | |
| 635 best_count + best_fake) { | |
| 636 if (cutpts[x - array_origin].fake_count < best_fake || | |
| 637 (cutpts[x - array_origin].fake_count == best_fake && | |
| 638 cutpts[x - array_origin].cost_function() < best_cost)) { | |
| 639 best_fake = cutpts[x - array_origin].fake_count; | |
| 640 best_cost = cutpts[x - array_origin].cost_function(); | |
| 641 best_left_x = x; | |
| 642 best_right_x = x; | |
| 643 best_count = cutpts[x - array_origin].index(); | |
| 644 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 && | |
| 645 cutpts[x - array_origin].cost_function() == best_cost) { | |
| 646 // exactly equal | |
| 647 best_right_x = x; | |
| 648 } | |
| 649 } | |
| 650 x++; | |
| 651 } | |
| 652 ASSERT_HOST(best_fake < INT16_MAX); | |
| 653 | |
| 654 // end of best path | |
| 655 FPCUTPT *best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; | |
| 656 // for (x=left_edge-pitch;x<right_edge+pitch;x++) | |
| 657 // { | |
| 658 // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", | |
| 659 // x,cutpts[x-array_origin].cost_function(), | |
| 660 // cutpts[x-array_origin].sum(), | |
| 661 // cutpts[x-array_origin].squares(), | |
| 662 // cutpts[x-array_origin].previous()->position()); | |
| 663 // } | |
| 664 occupation_count = -1; | |
| 665 do { | |
| 666 for (x = best_end->position() - pitch + pitch_error; | |
| 667 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) { | |
| 668 } | |
| 669 if (x < best_end->position() - pitch_error) { | |
| 670 occupation_count++; | |
| 671 } | |
| 672 // copy it | |
| 673 segpt = new FPSEGPT(best_end); | |
| 674 seg_it.add_before_then_move(segpt); | |
| 675 best_end = best_end->previous(); | |
| 676 } while (best_end != nullptr); | |
| 677 seg_it.move_to_last(); | |
| 678 mean_sum = seg_it.data()->sum(); | |
| 679 mean_sum = mean_sum * mean_sum / best_count; | |
| 680 if (seg_it.data()->squares() - mean_sum < 0) { | |
| 681 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(), | |
| 682 seg_it.data()->sum(), best_count); | |
| 683 } | |
| 684 return seg_it.data()->squares() - mean_sum; | |
| 685 } | |
| 686 | |
| 687 } // namespace tesseract |
