Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/fpchop.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: fpchop.cpp (Formerly fp_chop.c) | |
| 3 * Description: Code to chop fixed pitch text into character cells. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1993, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 // Include automatically generated configuration file if running autoconf. | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 # include "config_auto.h" | |
| 22 #endif | |
| 23 | |
| 24 #include "fpchop.h" | |
| 25 | |
| 26 #include "blobbox.h" | |
| 27 #include "drawtord.h" | |
| 28 #include "statistc.h" | |
| 29 #include "topitch.h" | |
| 30 #include "tovars.h" | |
| 31 | |
| 32 namespace tesseract { | |
| 33 | |
| 34 INT_VAR(textord_fp_chop_error, 2, "Max allowed bending of chop cells"); | |
| 35 | |
| 36 static WERD *add_repeated_word(WERD_IT *rep_it, int16_t &rep_left, int16_t &prev_chop_coord, | |
| 37 uint8_t &blanks, float pitch, WERD_IT *word_it); | |
| 38 | |
| 39 static void fixed_chop_cblob(C_BLOB *blob, int16_t chop_coord, float pitch_error, | |
| 40 C_OUTLINE_LIST *left_outlines, C_OUTLINE_LIST *right_outlines); | |
| 41 | |
| 42 static void fixed_split_coutline(C_OUTLINE *srcline, int16_t chop_coord, float pitch_error, | |
| 43 C_OUTLINE_IT *left_it, C_OUTLINE_IT *right_it); | |
| 44 | |
| 45 static bool fixed_chop_coutline(C_OUTLINE *srcline, int16_t chop_coord, float pitch_error, | |
| 46 C_OUTLINE_FRAG_LIST *left_frags, C_OUTLINE_FRAG_LIST *right_frags); | |
| 47 | |
| 48 static void save_chop_cfragment(int16_t head_index, ICOORD head_pos, int16_t tail_index, | |
| 49 ICOORD tail_pos, C_OUTLINE *srcline, C_OUTLINE_FRAG_LIST *frags); | |
| 50 | |
| 51 static void add_frag_to_list(C_OUTLINE_FRAG *frag, C_OUTLINE_FRAG_LIST *frags); | |
| 52 | |
| 53 static void close_chopped_cfragments(C_OUTLINE_FRAG_LIST *frags, C_OUTLINE_LIST *children, | |
| 54 float pitch_error, C_OUTLINE_IT *dest_it); | |
| 55 | |
| 56 static C_OUTLINE *join_chopped_fragments(C_OUTLINE_FRAG *bottom, C_OUTLINE_FRAG *top); | |
| 57 | |
| 58 static void join_segments(C_OUTLINE_FRAG *bottom, C_OUTLINE_FRAG *top); | |
| 59 | |
| 60 /********************************************************************** | |
| 61 * fixed_pitch_words | |
| 62 * | |
| 63 * Make a ROW from a fixed pitch TO_ROW. | |
| 64 **********************************************************************/ | |
| 65 ROW *fixed_pitch_words( // find lines | |
| 66 TO_ROW *row, // row to do | |
| 67 FCOORD rotation // for drawing | |
| 68 ) { | |
| 69 bool bol; // start of line | |
| 70 uint8_t blanks; // in front of word | |
| 71 uint8_t new_blanks; // blanks in empty cell | |
| 72 int16_t chop_coord; // chop boundary | |
| 73 int16_t prev_chop_coord; // start of cell | |
| 74 int16_t rep_left; // left edge of rep word | |
| 75 ROW *real_row; // output row | |
| 76 C_OUTLINE_LIST left_coutlines; | |
| 77 C_OUTLINE_LIST right_coutlines; | |
| 78 C_BLOB_LIST cblobs; | |
| 79 C_BLOB_IT cblob_it = &cblobs; | |
| 80 WERD_LIST words; | |
| 81 WERD_IT word_it = &words; // new words | |
| 82 // repeated blobs | |
| 83 WERD_IT rep_it = &row->rep_words; | |
| 84 WERD *word; // new word | |
| 85 int32_t xstarts[2]; // row ends | |
| 86 int32_t prev_x; // end of prev blob | |
| 87 // iterator | |
| 88 BLOBNBOX_IT box_it = row->blob_list(); | |
| 89 // boundaries | |
| 90 ICOORDELT_IT cell_it = &row->char_cells; | |
| 91 | |
| 92 #ifndef GRAPHICS_DISABLED | |
| 93 if (textord_show_page_cuts && to_win != nullptr) { | |
| 94 plot_row_cells(to_win, ScrollView::RED, row, 0, &row->char_cells); | |
| 95 } | |
| 96 #endif | |
| 97 | |
| 98 prev_x = -INT16_MAX; | |
| 99 bol = true; | |
| 100 blanks = 0; | |
| 101 if (rep_it.empty()) { | |
| 102 rep_left = INT16_MAX; | |
| 103 } else { | |
| 104 rep_left = rep_it.data()->bounding_box().left(); | |
| 105 } | |
| 106 if (box_it.empty()) { | |
| 107 return nullptr; // empty row | |
| 108 } | |
| 109 xstarts[0] = box_it.data()->bounding_box().left(); | |
| 110 if (rep_left < xstarts[0]) { | |
| 111 xstarts[0] = rep_left; | |
| 112 } | |
| 113 if (cell_it.empty() || row->char_cells.singleton()) { | |
| 114 tprintf("Row without enough char cells!\n"); | |
| 115 tprintf("Leftmost blob is at (%d,%d)\n", box_it.data()->bounding_box().left(), | |
| 116 box_it.data()->bounding_box().bottom()); | |
| 117 return nullptr; | |
| 118 } | |
| 119 ASSERT_HOST(!cell_it.empty() && !row->char_cells.singleton()); | |
| 120 prev_chop_coord = cell_it.data()->x(); | |
| 121 word = nullptr; | |
| 122 while (rep_left < cell_it.data()->x()) { | |
| 123 word = | |
| 124 add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it); | |
| 125 } | |
| 126 cell_it.mark_cycle_pt(); | |
| 127 if (prev_chop_coord >= cell_it.data()->x()) { | |
| 128 cell_it.forward(); | |
| 129 } | |
| 130 for (; !cell_it.cycled_list(); cell_it.forward()) { | |
| 131 chop_coord = cell_it.data()->x(); | |
| 132 while (!box_it.empty() && box_it.data()->bounding_box().left() <= chop_coord) { | |
| 133 if (box_it.data()->bounding_box().right() > prev_x) { | |
| 134 prev_x = box_it.data()->bounding_box().right(); | |
| 135 } | |
| 136 split_to_blob(box_it.extract(), chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines, | |
| 137 &right_coutlines); | |
| 138 box_it.forward(); | |
| 139 while (!box_it.empty() && box_it.data()->cblob() == nullptr) { | |
| 140 delete box_it.extract(); | |
| 141 box_it.forward(); | |
| 142 } | |
| 143 } | |
| 144 if (!right_coutlines.empty() && left_coutlines.empty()) { | |
| 145 split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5f, &left_coutlines, | |
| 146 &right_coutlines); | |
| 147 } | |
| 148 if (!left_coutlines.empty()) { | |
| 149 cblob_it.add_after_then_move(new C_BLOB(&left_coutlines)); | |
| 150 } else { | |
| 151 if (rep_left < chop_coord) { | |
| 152 if (rep_left > prev_chop_coord) { | |
| 153 new_blanks = | |
| 154 static_cast<uint8_t>(floor((rep_left - prev_chop_coord) / row->fixed_pitch + 0.5)); | |
| 155 } else { | |
| 156 new_blanks = 0; | |
| 157 } | |
| 158 } else { | |
| 159 if (chop_coord > prev_chop_coord) { | |
| 160 new_blanks = | |
| 161 static_cast<uint8_t>(floor((chop_coord - prev_chop_coord) / row->fixed_pitch + 0.5)); | |
| 162 } else { | |
| 163 new_blanks = 0; | |
| 164 } | |
| 165 } | |
| 166 if (!cblob_it.empty()) { | |
| 167 if (blanks < 1 && word != nullptr && !word->flag(W_REP_CHAR)) { | |
| 168 blanks = 1; | |
| 169 } | |
| 170 word = new WERD(&cblobs, blanks, nullptr); | |
| 171 cblob_it.set_to_list(&cblobs); | |
| 172 word->set_flag(W_DONT_CHOP, true); | |
| 173 word_it.add_after_then_move(word); | |
| 174 if (bol) { | |
| 175 word->set_flag(W_BOL, true); | |
| 176 bol = false; | |
| 177 } | |
| 178 blanks = new_blanks; | |
| 179 } else { | |
| 180 blanks += new_blanks; | |
| 181 } | |
| 182 while (rep_left < chop_coord) { | |
| 183 word = add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, | |
| 184 &word_it); | |
| 185 } | |
| 186 } | |
| 187 if (prev_chop_coord < chop_coord) { | |
| 188 prev_chop_coord = chop_coord; | |
| 189 } | |
| 190 } | |
| 191 if (!cblob_it.empty()) { | |
| 192 word = new WERD(&cblobs, blanks, nullptr); | |
| 193 word->set_flag(W_DONT_CHOP, true); | |
| 194 word_it.add_after_then_move(word); | |
| 195 if (bol) { | |
| 196 word->set_flag(W_BOL, true); | |
| 197 } | |
| 198 } | |
| 199 ASSERT_HOST(word != nullptr); | |
| 200 while (!rep_it.empty()) { | |
| 201 add_repeated_word(&rep_it, rep_left, prev_chop_coord, blanks, row->fixed_pitch, &word_it); | |
| 202 } | |
| 203 // at end of line | |
| 204 word_it.data()->set_flag(W_EOL, true); | |
| 205 if (prev_chop_coord > prev_x) { | |
| 206 prev_x = prev_chop_coord; | |
| 207 } | |
| 208 xstarts[1] = prev_x + 1; | |
| 209 real_row = | |
| 210 new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size)); | |
| 211 word_it.set_to_list(real_row->word_list()); | |
| 212 // put words in row | |
| 213 word_it.add_list_after(&words); | |
| 214 real_row->recalc_bounding_box(); | |
| 215 return real_row; | |
| 216 } | |
| 217 | |
| 218 /********************************************************************** | |
| 219 * add_repeated_word | |
| 220 * | |
| 221 * Add repeated word into the row at the given point. | |
| 222 **********************************************************************/ | |
| 223 | |
| 224 static WERD *add_repeated_word( // move repeated word | |
| 225 WERD_IT *rep_it, // repeated words | |
| 226 int16_t &rep_left, // left edge of word | |
| 227 int16_t &prev_chop_coord, // previous word end | |
| 228 uint8_t &blanks, // no of blanks | |
| 229 float pitch, // char cell size | |
| 230 WERD_IT *word_it // list of words | |
| 231 ) { | |
| 232 WERD *word; // word to move | |
| 233 int16_t new_blanks; // extra blanks | |
| 234 | |
| 235 if (rep_left > prev_chop_coord) { | |
| 236 new_blanks = static_cast<uint8_t>(floor((rep_left - prev_chop_coord) / pitch + 0.5)); | |
| 237 blanks += new_blanks; | |
| 238 } | |
| 239 word = rep_it->extract(); | |
| 240 prev_chop_coord = word->bounding_box().right(); | |
| 241 word_it->add_after_then_move(word); | |
| 242 word->set_blanks(blanks); | |
| 243 rep_it->forward(); | |
| 244 if (rep_it->empty()) { | |
| 245 rep_left = INT16_MAX; | |
| 246 } else { | |
| 247 rep_left = rep_it->data()->bounding_box().left(); | |
| 248 } | |
| 249 blanks = 0; | |
| 250 return word; | |
| 251 } | |
| 252 | |
| 253 /********************************************************************** | |
| 254 * split_to_blob | |
| 255 * | |
| 256 * Split a BLOBNBOX across a vertical chop line and put the pieces | |
| 257 * into a left outline list and a right outline list. | |
| 258 **********************************************************************/ | |
| 259 | |
| 260 void split_to_blob( // split the blob | |
| 261 BLOBNBOX *blob, // blob to split | |
| 262 int16_t chop_coord, // place to chop | |
| 263 float pitch_error, // allowed deviation | |
| 264 C_OUTLINE_LIST *left_coutlines, // for cblobs | |
| 265 C_OUTLINE_LIST *right_coutlines) { | |
| 266 C_BLOB *real_cblob; // cblob to chop | |
| 267 | |
| 268 if (blob != nullptr) { | |
| 269 real_cblob = blob->remove_cblob(); | |
| 270 } else { | |
| 271 real_cblob = nullptr; | |
| 272 } | |
| 273 if (!right_coutlines->empty() || real_cblob != nullptr) { | |
| 274 fixed_chop_cblob(real_cblob, chop_coord, pitch_error, left_coutlines, right_coutlines); | |
| 275 } | |
| 276 | |
| 277 delete blob; | |
| 278 } | |
| 279 | |
| 280 /********************************************************************** | |
| 281 * fixed_chop_cblob | |
| 282 * | |
| 283 * Chop the given cblob (if any) and the existing right outlines to | |
| 284 * produce a list of outlines left of the chop point and more to the right. | |
| 285 **********************************************************************/ | |
| 286 | |
| 287 static void fixed_chop_cblob( // split the blob | |
| 288 C_BLOB *blob, // blob to split | |
| 289 int16_t chop_coord, // place to chop | |
| 290 float pitch_error, // allowed deviation | |
| 291 C_OUTLINE_LIST *left_outlines, // left half of chop | |
| 292 C_OUTLINE_LIST *right_outlines // right half of chop | |
| 293 ) { | |
| 294 C_OUTLINE *old_right; // already there | |
| 295 C_OUTLINE_LIST new_outlines; // new right ones | |
| 296 // output iterator | |
| 297 C_OUTLINE_IT left_it = left_outlines; | |
| 298 // in/out iterator | |
| 299 C_OUTLINE_IT right_it = right_outlines; | |
| 300 C_OUTLINE_IT new_it = &new_outlines; | |
| 301 C_OUTLINE_IT blob_it; // outlines in blob | |
| 302 | |
| 303 if (!right_it.empty()) { | |
| 304 while (!right_it.empty()) { | |
| 305 old_right = right_it.extract(); | |
| 306 right_it.forward(); | |
| 307 fixed_split_coutline(old_right, chop_coord, pitch_error, &left_it, &new_it); | |
| 308 } | |
| 309 right_it.add_list_before(&new_outlines); | |
| 310 } | |
| 311 if (blob != nullptr) { | |
| 312 blob_it.set_to_list(blob->out_list()); | |
| 313 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 314 fixed_split_coutline(blob_it.extract(), chop_coord, pitch_error, &left_it, &right_it); | |
| 315 } | |
| 316 delete blob; | |
| 317 } | |
| 318 } | |
| 319 | |
| 320 /********************************************************************** | |
| 321 * fixed_split_outline | |
| 322 * | |
| 323 * Chop the given outline (if necessary) placing the fragments which | |
| 324 * fall either side of the chop line into the appropriate list. | |
| 325 **********************************************************************/ | |
| 326 | |
| 327 static void fixed_split_coutline( // chop the outline | |
| 328 C_OUTLINE *srcline, // source outline | |
| 329 int16_t chop_coord, // place to chop | |
| 330 float pitch_error, // allowed deviation | |
| 331 C_OUTLINE_IT *left_it, // left half of chop | |
| 332 C_OUTLINE_IT *right_it // right half of chop | |
| 333 ) { | |
| 334 C_OUTLINE *child; // child outline | |
| 335 TBOX srcbox; // box of outline | |
| 336 C_OUTLINE_LIST left_ch; // left children | |
| 337 C_OUTLINE_LIST right_ch; // right children | |
| 338 C_OUTLINE_FRAG_LIST left_frags; // chopped fragments | |
| 339 C_OUTLINE_FRAG_LIST right_frags; | |
| 340 ; | |
| 341 C_OUTLINE_IT left_ch_it = &left_ch; | |
| 342 // for whole children | |
| 343 C_OUTLINE_IT right_ch_it = &right_ch; | |
| 344 // for holes | |
| 345 C_OUTLINE_IT child_it = srcline->child(); | |
| 346 | |
| 347 srcbox = srcline->bounding_box(); | |
| 348 if (srcbox.left() + srcbox.right() <= chop_coord * 2 && | |
| 349 srcbox.right() < chop_coord + pitch_error) { | |
| 350 // Whole outline is in the left side or not far over the chop_coord, | |
| 351 // so put the whole thing on the left. | |
| 352 left_it->add_after_then_move(srcline); | |
| 353 } else if (srcbox.left() + srcbox.right() > chop_coord * 2 && | |
| 354 srcbox.left() > chop_coord - pitch_error) { | |
| 355 // Whole outline is in the right side or not far over the chop_coord, | |
| 356 // so put the whole thing on the right. | |
| 357 right_it->add_before_stay_put(srcline); | |
| 358 } else { | |
| 359 // Needs real chopping. | |
| 360 if (fixed_chop_coutline(srcline, chop_coord, pitch_error, &left_frags, &right_frags)) { | |
| 361 for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { | |
| 362 child = child_it.extract(); | |
| 363 srcbox = child->bounding_box(); | |
| 364 if (srcbox.right() < chop_coord) { | |
| 365 // Whole child is on the left. | |
| 366 left_ch_it.add_after_then_move(child); | |
| 367 } else if (srcbox.left() > chop_coord) { | |
| 368 // Whole child is on the right. | |
| 369 right_ch_it.add_after_then_move(child); | |
| 370 } else { | |
| 371 // No pitch_error is allowed when chopping children to prevent | |
| 372 // impossible outlines from being created. | |
| 373 if (fixed_chop_coutline(child, chop_coord, 0.0f, &left_frags, &right_frags)) { | |
| 374 delete child; | |
| 375 } else { | |
| 376 if (srcbox.left() + srcbox.right() <= chop_coord * 2) { | |
| 377 left_ch_it.add_after_then_move(child); | |
| 378 } else { | |
| 379 right_ch_it.add_after_then_move(child); | |
| 380 } | |
| 381 } | |
| 382 } | |
| 383 } | |
| 384 close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it); | |
| 385 close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it); | |
| 386 ASSERT_HOST(left_ch.empty() && right_ch.empty()); | |
| 387 // No children left. | |
| 388 delete srcline; // Smashed up. | |
| 389 } else { | |
| 390 // Chop failed. Just use middle coord. | |
| 391 if (srcbox.left() + srcbox.right() <= chop_coord * 2) { | |
| 392 left_it->add_after_then_move(srcline); // Stick whole in left. | |
| 393 } else { | |
| 394 right_it->add_before_stay_put(srcline); | |
| 395 } | |
| 396 } | |
| 397 } | |
| 398 } | |
| 399 | |
| 400 /********************************************************************** | |
| 401 * fixed_chop_coutline | |
| 402 * | |
| 403 * Chop the given coutline (if necessary) placing the fragments which | |
| 404 * fall either side of the chop line into the appropriate list. | |
| 405 * If the coutline lies too heavily to one side to chop, false is returned. | |
| 406 **********************************************************************/ | |
| 407 | |
| 408 static bool fixed_chop_coutline( // chop the outline | |
| 409 C_OUTLINE *srcline, // source outline | |
| 410 int16_t chop_coord, // place to chop | |
| 411 float pitch_error, // allowed deviation | |
| 412 C_OUTLINE_FRAG_LIST *left_frags, // left half of chop | |
| 413 C_OUTLINE_FRAG_LIST *right_frags // right half of chop | |
| 414 ) { | |
| 415 bool first_frag; // fragment | |
| 416 int16_t left_edge; // of outline | |
| 417 int16_t startindex; // in first fragment | |
| 418 int32_t length; // of outline | |
| 419 int16_t stepindex; // into outline | |
| 420 int16_t head_index; // start of fragment | |
| 421 ICOORD head_pos; // start of fragment | |
| 422 int16_t tail_index; // end of fragment | |
| 423 ICOORD tail_pos; // end of fragment | |
| 424 ICOORD pos; // current point | |
| 425 int16_t first_index = 0; // first tail | |
| 426 ICOORD first_pos; // first tail | |
| 427 | |
| 428 length = srcline->pathlength(); | |
| 429 pos = srcline->start_pos(); | |
| 430 left_edge = pos.x(); | |
| 431 tail_index = 0; | |
| 432 tail_pos = pos; | |
| 433 for (stepindex = 0; stepindex < length; stepindex++) { | |
| 434 if (pos.x() < left_edge) { | |
| 435 left_edge = pos.x(); | |
| 436 tail_index = stepindex; | |
| 437 tail_pos = pos; | |
| 438 } | |
| 439 pos += srcline->step(stepindex); | |
| 440 } | |
| 441 if (left_edge >= chop_coord - pitch_error) { | |
| 442 return false; // not worth it | |
| 443 } | |
| 444 | |
| 445 startindex = tail_index; | |
| 446 first_frag = true; | |
| 447 head_index = tail_index; | |
| 448 head_pos = tail_pos; | |
| 449 do { | |
| 450 do { | |
| 451 tail_pos += srcline->step(tail_index); | |
| 452 tail_index++; | |
| 453 if (tail_index == length) { | |
| 454 tail_index = 0; | |
| 455 } | |
| 456 } while (tail_pos.x() != chop_coord && tail_index != startindex); | |
| 457 if (tail_index == startindex) { | |
| 458 if (first_frag) { | |
| 459 return false; // doesn't cross line | |
| 460 } else { | |
| 461 break; | |
| 462 } | |
| 463 } | |
| 464 ASSERT_HOST(head_index != tail_index); | |
| 465 if (!first_frag) { | |
| 466 save_chop_cfragment(head_index, head_pos, tail_index, tail_pos, srcline, left_frags); | |
| 467 } else { | |
| 468 first_index = tail_index; | |
| 469 first_pos = tail_pos; | |
| 470 first_frag = false; | |
| 471 } | |
| 472 while (srcline->step(tail_index).x() == 0) { | |
| 473 tail_pos += srcline->step(tail_index); | |
| 474 tail_index++; | |
| 475 if (tail_index == length) { | |
| 476 tail_index = 0; | |
| 477 } | |
| 478 } | |
| 479 head_index = tail_index; | |
| 480 head_pos = tail_pos; | |
| 481 while (srcline->step(tail_index).x() > 0) { | |
| 482 do { | |
| 483 tail_pos += srcline->step(tail_index); | |
| 484 tail_index++; | |
| 485 if (tail_index == length) { | |
| 486 tail_index = 0; | |
| 487 } | |
| 488 } while (tail_pos.x() != chop_coord); | |
| 489 ASSERT_HOST(head_index != tail_index); | |
| 490 save_chop_cfragment(head_index, head_pos, tail_index, tail_pos, srcline, right_frags); | |
| 491 while (srcline->step(tail_index).x() == 0) { | |
| 492 tail_pos += srcline->step(tail_index); | |
| 493 tail_index++; | |
| 494 if (tail_index == length) { | |
| 495 tail_index = 0; | |
| 496 } | |
| 497 } | |
| 498 head_index = tail_index; | |
| 499 head_pos = tail_pos; | |
| 500 } | |
| 501 } while (tail_index != startindex); | |
| 502 save_chop_cfragment(head_index, head_pos, first_index, first_pos, srcline, left_frags); | |
| 503 return true; // did some chopping | |
| 504 } | |
| 505 | |
| 506 /********************************************************************** | |
| 507 * save_chop_cfragment | |
| 508 * | |
| 509 * Store the given fragment in the given fragment list. | |
| 510 **********************************************************************/ | |
| 511 | |
| 512 static void save_chop_cfragment( // chop the outline | |
| 513 int16_t head_index, // head of fragment | |
| 514 ICOORD head_pos, // head of fragment | |
| 515 int16_t tail_index, // tail of fragment | |
| 516 ICOORD tail_pos, // tail of fragment | |
| 517 C_OUTLINE *srcline, // source of edgesteps | |
| 518 C_OUTLINE_FRAG_LIST *frags // fragment list | |
| 519 ) { | |
| 520 int16_t jump; // gap across end | |
| 521 int16_t stepcount; // total steps | |
| 522 C_OUTLINE_FRAG *head; // head of fragment | |
| 523 C_OUTLINE_FRAG *tail; // tail of fragment | |
| 524 int16_t tail_y; // ycoord of tail | |
| 525 | |
| 526 ASSERT_HOST(tail_pos.x() == head_pos.x()); | |
| 527 ASSERT_HOST(tail_index != head_index); | |
| 528 stepcount = tail_index - head_index; | |
| 529 if (stepcount < 0) { | |
| 530 stepcount += srcline->pathlength(); | |
| 531 } | |
| 532 jump = tail_pos.y() - head_pos.y(); | |
| 533 if (jump < 0) { | |
| 534 jump = -jump; | |
| 535 } | |
| 536 if (jump == stepcount) { | |
| 537 return; // its a nop | |
| 538 } | |
| 539 tail_y = tail_pos.y(); | |
| 540 head = new C_OUTLINE_FRAG(head_pos, tail_pos, srcline, head_index, tail_index); | |
| 541 tail = new C_OUTLINE_FRAG(head, tail_y); | |
| 542 head->other_end = tail; | |
| 543 add_frag_to_list(head, frags); | |
| 544 add_frag_to_list(tail, frags); | |
| 545 } | |
| 546 | |
| 547 /********************************************************************** | |
| 548 * C_OUTLINE_FRAG::C_OUTLINE_FRAG | |
| 549 * | |
| 550 * Constructors for C_OUTLINE_FRAG. | |
| 551 **********************************************************************/ | |
| 552 | |
| 553 C_OUTLINE_FRAG::C_OUTLINE_FRAG( // record fragment | |
| 554 ICOORD start_pt, // start coord | |
| 555 ICOORD end_pt, // end coord | |
| 556 C_OUTLINE *outline, // source of steps | |
| 557 int16_t start_index, int16_t end_index) { | |
| 558 start = start_pt; | |
| 559 end = end_pt; | |
| 560 ycoord = start_pt.y(); | |
| 561 stepcount = end_index - start_index; | |
| 562 if (stepcount < 0) { | |
| 563 stepcount += outline->pathlength(); | |
| 564 } | |
| 565 ASSERT_HOST(stepcount > 0); | |
| 566 steps = new DIR128[stepcount]; | |
| 567 if (end_index > start_index) { | |
| 568 for (int i = start_index; i < end_index; ++i) { | |
| 569 steps[i - start_index] = outline->step_dir(i); | |
| 570 } | |
| 571 } else { | |
| 572 int len = outline->pathlength(); | |
| 573 int i = start_index; | |
| 574 for (; i < len; ++i) { | |
| 575 steps[i - start_index] = outline->step_dir(i); | |
| 576 } | |
| 577 if (end_index > 0) { | |
| 578 for (; i < end_index + len; ++i) { | |
| 579 steps[i - start_index] = outline->step_dir(i - len); | |
| 580 } | |
| 581 } | |
| 582 } | |
| 583 other_end = nullptr; | |
| 584 delete close(); | |
| 585 } | |
| 586 | |
| 587 C_OUTLINE_FRAG::C_OUTLINE_FRAG( // record fragment | |
| 588 C_OUTLINE_FRAG *head, // other end | |
| 589 int16_t tail_y) { | |
| 590 ycoord = tail_y; | |
| 591 other_end = head; | |
| 592 start = head->start; | |
| 593 end = head->end; | |
| 594 steps = nullptr; | |
| 595 stepcount = 0; | |
| 596 } | |
| 597 | |
| 598 /********************************************************************** | |
| 599 * add_frag_to_list | |
| 600 * | |
| 601 * Insert the fragment in the list at the appropriate place to keep | |
| 602 * them in ascending ycoord order. | |
| 603 **********************************************************************/ | |
| 604 | |
| 605 static void add_frag_to_list( // ordered add | |
| 606 C_OUTLINE_FRAG *frag, // fragment to add | |
| 607 C_OUTLINE_FRAG_LIST *frags // fragment list | |
| 608 ) { | |
| 609 // output list | |
| 610 C_OUTLINE_FRAG_IT frag_it = frags; | |
| 611 | |
| 612 if (!frags->empty()) { | |
| 613 for (frag_it.mark_cycle_pt(); !frag_it.cycled_list(); frag_it.forward()) { | |
| 614 if (frag_it.data()->ycoord > frag->ycoord || | |
| 615 (frag_it.data()->ycoord == frag->ycoord && frag->other_end->ycoord < frag->ycoord)) { | |
| 616 frag_it.add_before_then_move(frag); | |
| 617 return; | |
| 618 } | |
| 619 } | |
| 620 } | |
| 621 frag_it.add_to_end(frag); | |
| 622 } | |
| 623 | |
| 624 /********************************************************************** | |
| 625 * close_chopped_cfragments | |
| 626 * | |
| 627 * Clear the given list of fragments joining them up into outlines. | |
| 628 * Each outline made soaks up any of the child outlines which it encloses. | |
| 629 **********************************************************************/ | |
| 630 | |
| 631 static void close_chopped_cfragments( // chop the outline | |
| 632 C_OUTLINE_FRAG_LIST *frags, // list to clear | |
| 633 C_OUTLINE_LIST *children, // potential children | |
| 634 float pitch_error, // allowed shrinkage | |
| 635 C_OUTLINE_IT *dest_it // output list | |
| 636 ) { | |
| 637 // iterator | |
| 638 C_OUTLINE_FRAG_IT frag_it = frags; | |
| 639 C_OUTLINE_FRAG *bottom_frag; // bottom of cut | |
| 640 C_OUTLINE_FRAG *top_frag; // top of cut | |
| 641 C_OUTLINE *outline; // new outline | |
| 642 C_OUTLINE *child; // current child | |
| 643 C_OUTLINE_IT child_it = children; | |
| 644 C_OUTLINE_IT olchild_it; // children of outline | |
| 645 | |
| 646 while (!frag_it.empty()) { | |
| 647 frag_it.move_to_first(); | |
| 648 // get bottom one | |
| 649 bottom_frag = frag_it.extract(); | |
| 650 frag_it.forward(); | |
| 651 top_frag = frag_it.data(); // look at next | |
| 652 if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr) || | |
| 653 (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) { | |
| 654 if (frag_it.data_relative(1)->ycoord == top_frag->ycoord) { | |
| 655 frag_it.forward(); | |
| 656 } | |
| 657 } | |
| 658 top_frag = frag_it.extract(); | |
| 659 if (top_frag->other_end != bottom_frag) { | |
| 660 outline = join_chopped_fragments(bottom_frag, top_frag); | |
| 661 ASSERT_HOST(outline == nullptr); | |
| 662 } else { | |
| 663 outline = join_chopped_fragments(bottom_frag, top_frag); | |
| 664 if (outline != nullptr) { | |
| 665 olchild_it.set_to_list(outline->child()); | |
| 666 for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { | |
| 667 child = child_it.data(); | |
| 668 if (*child < *outline) { | |
| 669 olchild_it.add_to_end(child_it.extract()); | |
| 670 } | |
| 671 } | |
| 672 if (outline->bounding_box().width() > pitch_error) { | |
| 673 dest_it->add_after_then_move(outline); | |
| 674 } else { | |
| 675 delete outline; // Make it disappear. | |
| 676 } | |
| 677 } | |
| 678 } | |
| 679 } | |
| 680 while (!child_it.empty()) { | |
| 681 dest_it->add_after_then_move(child_it.extract()); | |
| 682 child_it.forward(); | |
| 683 } | |
| 684 } | |
| 685 | |
| 686 /********************************************************************** | |
| 687 * join_chopped_fragments | |
| 688 * | |
| 689 * Join the two lists of POLYPTs such that neither OUTLINE_FRAG | |
| 690 * operand keeps responsibility for the fragment. | |
| 691 **********************************************************************/ | |
| 692 | |
| 693 static C_OUTLINE *join_chopped_fragments( // join pieces | |
| 694 C_OUTLINE_FRAG *bottom, // bottom of cut | |
| 695 C_OUTLINE_FRAG *top // top of cut | |
| 696 ) { | |
| 697 C_OUTLINE *outline; // closed loop | |
| 698 | |
| 699 if (bottom->other_end == top) { | |
| 700 if (bottom->steps == nullptr) { | |
| 701 outline = top->close(); // turn to outline | |
| 702 } else { | |
| 703 outline = bottom->close(); | |
| 704 } | |
| 705 delete top; | |
| 706 delete bottom; | |
| 707 return outline; | |
| 708 } | |
| 709 if (bottom->steps == nullptr) { | |
| 710 ASSERT_HOST(top->steps != nullptr); | |
| 711 join_segments(bottom->other_end, top); | |
| 712 } else { | |
| 713 ASSERT_HOST(top->steps == nullptr); | |
| 714 join_segments(top->other_end, bottom); | |
| 715 } | |
| 716 top->other_end->other_end = bottom->other_end; | |
| 717 bottom->other_end->other_end = top->other_end; | |
| 718 delete bottom; | |
| 719 delete top; | |
| 720 return nullptr; | |
| 721 } | |
| 722 | |
| 723 /********************************************************************** | |
| 724 * join_segments | |
| 725 * | |
| 726 * Join the two edgestep fragments such that the second comes after | |
| 727 * the first and the gap between them is closed. | |
| 728 **********************************************************************/ | |
| 729 | |
| 730 static void join_segments( // join pieces | |
| 731 C_OUTLINE_FRAG *bottom, // bottom of cut | |
| 732 C_OUTLINE_FRAG *top // top of cut | |
| 733 ) { | |
| 734 DIR128 *steps; // new steps | |
| 735 int32_t stepcount; // no of steps | |
| 736 int16_t fake_count; // fake steps | |
| 737 DIR128 fake_step; // step entry | |
| 738 | |
| 739 ASSERT_HOST(bottom->end.x() == top->start.x()); | |
| 740 fake_count = top->start.y() - bottom->end.y(); | |
| 741 if (fake_count < 0) { | |
| 742 fake_count = -fake_count; | |
| 743 fake_step = 32; | |
| 744 } else { | |
| 745 fake_step = 96; | |
| 746 } | |
| 747 | |
| 748 stepcount = bottom->stepcount + fake_count + top->stepcount; | |
| 749 steps = new DIR128[stepcount]; | |
| 750 memmove(steps, bottom->steps, bottom->stepcount); | |
| 751 memset(steps + bottom->stepcount, fake_step.get_dir(), fake_count); | |
| 752 memmove(steps + bottom->stepcount + fake_count, top->steps, top->stepcount); | |
| 753 delete[] bottom->steps; | |
| 754 bottom->steps = steps; | |
| 755 bottom->stepcount = stepcount; | |
| 756 bottom->end = top->end; | |
| 757 bottom->other_end->end = top->end; | |
| 758 } | |
| 759 | |
| 760 /********************************************************************** | |
| 761 * C_OUTLINE_FRAG::close | |
| 762 * | |
| 763 * Join the ends of this fragment and turn it into an outline. | |
| 764 **********************************************************************/ | |
| 765 | |
| 766 C_OUTLINE *C_OUTLINE_FRAG::close() { // join pieces | |
| 767 DIR128 *new_steps; // new steps | |
| 768 int32_t new_stepcount; // no of steps | |
| 769 int16_t fake_count; // fake steps | |
| 770 DIR128 fake_step; // step entry | |
| 771 | |
| 772 ASSERT_HOST(start.x() == end.x()); | |
| 773 fake_count = start.y() - end.y(); | |
| 774 if (fake_count < 0) { | |
| 775 fake_count = -fake_count; | |
| 776 fake_step = 32; | |
| 777 } else { | |
| 778 fake_step = 96; | |
| 779 } | |
| 780 | |
| 781 new_stepcount = stepcount + fake_count; | |
| 782 if (new_stepcount > C_OUTLINE::kMaxOutlineLength) { | |
| 783 return nullptr; // Can't join them | |
| 784 } | |
| 785 new_steps = new DIR128[new_stepcount]; | |
| 786 memmove(new_steps, steps, stepcount); | |
| 787 memset(new_steps + stepcount, fake_step.get_dir(), fake_count); | |
| 788 auto *result = new C_OUTLINE(start, new_steps, new_stepcount); | |
| 789 delete[] new_steps; | |
| 790 return result; | |
| 791 } | |
| 792 | |
| 793 /********************************************************************** | |
| 794 * C_OUTLINE_FRAG::operator= | |
| 795 * | |
| 796 * Copy this fragment. | |
| 797 **********************************************************************/ | |
| 798 | |
| 799 // join pieces | |
| 800 C_OUTLINE_FRAG &C_OUTLINE_FRAG::operator=(const C_OUTLINE_FRAG &src // fragment to copy | |
| 801 ) { | |
| 802 delete[] steps; | |
| 803 | |
| 804 stepcount = src.stepcount; | |
| 805 steps = new DIR128[stepcount]; | |
| 806 memmove(steps, src.steps, stepcount); | |
| 807 start = src.start; | |
| 808 end = src.end; | |
| 809 ycoord = src.ycoord; | |
| 810 return *this; | |
| 811 } | |
| 812 | |
| 813 } // namespace tesseract |
