Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/underlin.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: underlin.cpp (Formerly undrline.c) | |
| 3 * Description: Code to chop blobs apart from underlines. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1994, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "underlin.h" | |
| 20 | |
| 21 namespace tesseract { | |
| 22 | |
| 23 double_VAR(textord_underline_offset, 0.1, "Fraction of x to ignore"); | |
| 24 BOOL_VAR(textord_restore_underlines, true, "Chop underlines & put back"); | |
| 25 | |
| 26 /********************************************************************** | |
| 27 * restore_underlined_blobs | |
| 28 * | |
| 29 * Find underlined blobs and put them back in the row. | |
| 30 **********************************************************************/ | |
| 31 | |
| 32 void restore_underlined_blobs( // get chop points | |
| 33 TO_BLOCK *block // block to do | |
| 34 ) { | |
| 35 int16_t chop_coord; // chop boundary | |
| 36 TBOX blob_box; // of underline | |
| 37 BLOBNBOX *u_line; // underline bit | |
| 38 TO_ROW *row; // best row for blob | |
| 39 ICOORDELT_LIST chop_cells; // blobs to cut out | |
| 40 // real underlines | |
| 41 BLOBNBOX_LIST residual_underlines; | |
| 42 C_OUTLINE_LIST left_coutlines; | |
| 43 C_OUTLINE_LIST right_coutlines; | |
| 44 ICOORDELT_IT cell_it = &chop_cells; | |
| 45 // under lines | |
| 46 BLOBNBOX_IT under_it = &block->underlines; | |
| 47 BLOBNBOX_IT ru_it = &residual_underlines; | |
| 48 | |
| 49 if (block->get_rows()->empty()) { | |
| 50 return; // Don't crash if there are no rows. | |
| 51 } | |
| 52 for (under_it.mark_cycle_pt(); !under_it.cycled_list(); under_it.forward()) { | |
| 53 u_line = under_it.extract(); | |
| 54 blob_box = u_line->bounding_box(); | |
| 55 row = most_overlapping_row(block->get_rows(), u_line); | |
| 56 if (row == nullptr) { | |
| 57 return; // Don't crash if there is no row. | |
| 58 } | |
| 59 find_underlined_blobs(u_line, &row->baseline, row->xheight, | |
| 60 row->xheight * textord_underline_offset, &chop_cells); | |
| 61 cell_it.set_to_list(&chop_cells); | |
| 62 for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) { | |
| 63 chop_coord = cell_it.data()->x(); | |
| 64 if (cell_it.data()->y() - chop_coord > textord_fp_chop_error + 1) { | |
| 65 split_to_blob(u_line, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines, | |
| 66 &right_coutlines); | |
| 67 if (!left_coutlines.empty()) { | |
| 68 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); | |
| 69 } | |
| 70 chop_coord = cell_it.data()->y(); | |
| 71 split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines, | |
| 72 &right_coutlines); | |
| 73 if (!left_coutlines.empty()) { | |
| 74 row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); | |
| 75 } | |
| 76 u_line = nullptr; // no more blobs to add | |
| 77 } | |
| 78 delete cell_it.extract(); | |
| 79 } | |
| 80 if (!right_coutlines.empty()) { | |
| 81 split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines, | |
| 82 &right_coutlines); | |
| 83 if (!left_coutlines.empty()) { | |
| 84 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); | |
| 85 } | |
| 86 } | |
| 87 delete u_line; | |
| 88 } | |
| 89 if (!ru_it.empty()) { | |
| 90 ru_it.move_to_first(); | |
| 91 for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { | |
| 92 under_it.add_after_then_move(ru_it.extract()); | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 /********************************************************************** | |
| 98 * most_overlapping_row | |
| 99 * | |
| 100 * Return the row which most overlaps the blob. | |
| 101 **********************************************************************/ | |
| 102 | |
| 103 TO_ROW *most_overlapping_row( // find best row | |
| 104 TO_ROW_LIST *rows, // list of rows | |
| 105 BLOBNBOX *blob // blob to place | |
| 106 ) { | |
| 107 int16_t x = (blob->bounding_box().left() + blob->bounding_box().right()) / 2; | |
| 108 TO_ROW_IT row_it = rows; // row iterator | |
| 109 TO_ROW *row; // current row | |
| 110 TO_ROW *best_row; // output row | |
| 111 float overlap; // of blob & row | |
| 112 float bestover; // best overlap | |
| 113 | |
| 114 best_row = nullptr; | |
| 115 bestover = static_cast<float>(-INT32_MAX); | |
| 116 if (row_it.empty()) { | |
| 117 return nullptr; | |
| 118 } | |
| 119 row = row_it.data(); | |
| 120 row_it.mark_cycle_pt(); | |
| 121 while (row->baseline.y(x) + row->descdrop > blob->bounding_box().top() && !row_it.cycled_list()) { | |
| 122 best_row = row; | |
| 123 bestover = blob->bounding_box().top() - row->baseline.y(x) + row->descdrop; | |
| 124 row_it.forward(); | |
| 125 row = row_it.data(); | |
| 126 } | |
| 127 while (row->baseline.y(x) + row->xheight + row->ascrise >= blob->bounding_box().bottom() && | |
| 128 !row_it.cycled_list()) { | |
| 129 overlap = row->baseline.y(x) + row->xheight + row->ascrise; | |
| 130 if (blob->bounding_box().top() < overlap) { | |
| 131 overlap = blob->bounding_box().top(); | |
| 132 } | |
| 133 if (blob->bounding_box().bottom() > row->baseline.y(x) + row->descdrop) { | |
| 134 overlap -= blob->bounding_box().bottom(); | |
| 135 } else { | |
| 136 overlap -= row->baseline.y(x) + row->descdrop; | |
| 137 } | |
| 138 if (overlap > bestover) { | |
| 139 bestover = overlap; | |
| 140 best_row = row; | |
| 141 } | |
| 142 row_it.forward(); | |
| 143 row = row_it.data(); | |
| 144 } | |
| 145 if (bestover < 0 && | |
| 146 row->baseline.y(x) + row->xheight + row->ascrise - blob->bounding_box().bottom() > bestover) { | |
| 147 best_row = row; | |
| 148 } | |
| 149 return best_row; | |
| 150 } | |
| 151 | |
| 152 /********************************************************************** | |
| 153 * find_underlined_blobs | |
| 154 * | |
| 155 * Find the start and end coords of blobs in the underline. | |
| 156 **********************************************************************/ | |
| 157 | |
| 158 void find_underlined_blobs( // get chop points | |
| 159 BLOBNBOX *u_line, // underlined unit | |
| 160 QSPLINE *baseline, // actual baseline | |
| 161 float xheight, // height of line | |
| 162 float baseline_offset, // amount to shrinke it | |
| 163 ICOORDELT_LIST *chop_cells // places to chop | |
| 164 ) { | |
| 165 ICOORD blob_chop; // sides of blob | |
| 166 TBOX blob_box = u_line->bounding_box(); | |
| 167 // cell iterator | |
| 168 ICOORDELT_IT cell_it = chop_cells; | |
| 169 STATS upper_proj(blob_box.left(), blob_box.right()); | |
| 170 STATS middle_proj(blob_box.left(), blob_box.right()); | |
| 171 STATS lower_proj(blob_box.left(), blob_box.right()); | |
| 172 C_OUTLINE_IT out_it; // outlines of blob | |
| 173 | |
| 174 ASSERT_HOST(u_line->cblob() != nullptr); | |
| 175 | |
| 176 out_it.set_to_list(u_line->cblob()->out_list()); | |
| 177 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { | |
| 178 vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, &lower_proj, | |
| 179 &middle_proj, &upper_proj); | |
| 180 } | |
| 181 | |
| 182 for (auto x = blob_box.left(); x < blob_box.right(); x++) { | |
| 183 if (middle_proj.pile_count(x) > 0) { | |
| 184 auto y = x + 1; | |
| 185 for (; y < blob_box.right() && middle_proj.pile_count(y) > 0; y++) { | |
| 186 ; | |
| 187 } | |
| 188 blob_chop = ICOORD(x, y); | |
| 189 cell_it.add_after_then_move(new ICOORDELT(blob_chop)); | |
| 190 x = y; | |
| 191 } | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 /********************************************************************** | |
| 196 * vertical_cunderline_projection | |
| 197 * | |
| 198 * Compute the vertical projection of an outline from its outlines | |
| 199 * and add to the given STATS. | |
| 200 **********************************************************************/ | |
| 201 | |
| 202 void vertical_cunderline_projection( // project outlines | |
| 203 C_OUTLINE *outline, // outline to project | |
| 204 QSPLINE *baseline, // actual baseline | |
| 205 float xheight, // height of line | |
| 206 float baseline_offset, // amount to shrinke it | |
| 207 STATS *lower_proj, // below baseline | |
| 208 STATS *middle_proj, // centre region | |
| 209 STATS *upper_proj // top region | |
| 210 ) { | |
| 211 ICOORD pos; // current point | |
| 212 ICOORD step; // edge step | |
| 213 int16_t lower_y, upper_y; // region limits | |
| 214 C_OUTLINE_IT out_it = outline->child(); | |
| 215 | |
| 216 pos = outline->start_pos(); | |
| 217 int16_t length = outline->pathlength(); | |
| 218 for (int16_t stepindex = 0; stepindex < length; stepindex++) { | |
| 219 step = outline->step(stepindex); | |
| 220 if (step.x() > 0) { | |
| 221 lower_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + 0.5)); | |
| 222 upper_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + xheight + 0.5)); | |
| 223 if (pos.y() >= lower_y) { | |
| 224 lower_proj->add(pos.x(), -lower_y); | |
| 225 if (pos.y() >= upper_y) { | |
| 226 middle_proj->add(pos.x(), lower_y - upper_y); | |
| 227 upper_proj->add(pos.x(), upper_y - pos.y()); | |
| 228 } else { | |
| 229 middle_proj->add(pos.x(), lower_y - pos.y()); | |
| 230 } | |
| 231 } else { | |
| 232 lower_proj->add(pos.x(), -pos.y()); | |
| 233 } | |
| 234 } else if (step.x() < 0) { | |
| 235 lower_y = static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + 0.5)); | |
| 236 upper_y = | |
| 237 static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + xheight + 0.5)); | |
| 238 if (pos.y() >= lower_y) { | |
| 239 lower_proj->add(pos.x() - 1, lower_y); | |
| 240 if (pos.y() >= upper_y) { | |
| 241 middle_proj->add(pos.x() - 1, upper_y - lower_y); | |
| 242 upper_proj->add(pos.x() - 1, pos.y() - upper_y); | |
| 243 } else { | |
| 244 middle_proj->add(pos.x() - 1, pos.y() - lower_y); | |
| 245 } | |
| 246 } else { | |
| 247 lower_proj->add(pos.x() - 1, pos.y()); | |
| 248 } | |
| 249 } | |
| 250 pos += step; | |
| 251 } | |
| 252 | |
| 253 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { | |
| 254 vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, lower_proj, | |
| 255 middle_proj, upper_proj); | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 } // namespace tesseract |
