diff mupdf-source/thirdparty/tesseract/src/textord/tabvector.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/textord/tabvector.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,998 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tabvector.cpp
+// Description: Class to hold a near-vertical vector representing a tab-stop.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "blobbox.h"
+#include "colfind.h"
+#include "colpartitionset.h"
+#include "detlinefit.h"
+#include "helpers.h" // for IntCastRounded
+#include "statistc.h"
+#include "tabvector.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Multiple of height used as a gutter for evaluation search.
+const int kGutterMultiple = 4;
+// Multiple of neighbour gap that we expect the gutter gap to be at minimum.
+const int kGutterToNeighbourRatio = 3;
+// Pixel distance for tab vectors to be considered the same.
+const int kSimilarVectorDist = 10;
+// Pixel distance for ragged tab vectors to be considered the same if there
+// is nothing in the overlap box
+const int kSimilarRaggedDist = 50;
+// Max multiple of height to allow filling in between blobs when evaluating.
+const int kMaxFillinMultiple = 11;
+// Min fraction of mean gutter size to allow a gutter on a good tab blob.
+const double kMinGutterFraction = 0.5;
+// Multiple of 1/n lines as a minimum gutter in evaluation.
+const double kLineCountReciprocal = 4.0;
+// Constant add-on for minimum gutter for aligned tabs.
+const double kMinAlignedGutter = 0.25;
+// Constant add-on for minimum gutter for ragged tabs.
+const double kMinRaggedGutter = 1.5;
+
+double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
+           "max fraction of mean blob width allowed for vertical gaps in "
+           "vertical text");
+
+double_VAR(textord_tabvector_vertical_box_ratio, 0.5,
+           "Fraction of box matches required to declare a line vertical");
+
+// Create a constraint for the top or bottom of this TabVector.
+void TabConstraint::CreateConstraint(TabVector *vector, bool is_top) {
+  auto *constraint = new TabConstraint(vector, is_top);
+  auto *constraints = new TabConstraint_LIST;
+  TabConstraint_IT it(constraints);
+  it.add_to_end(constraint);
+  if (is_top) {
+    vector->set_top_constraints(constraints);
+  } else {
+    vector->set_bottom_constraints(constraints);
+  }
+}
+
+// Test to see if the constraints are compatible enough to merge.
+bool TabConstraint::CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) {
+  if (list1 == list2) {
+    return false;
+  }
+  int y_min = -INT32_MAX;
+  int y_max = INT32_MAX;
+  if (textord_debug_tabfind > 3) {
+    tprintf("Testing constraint compatibility\n");
+  }
+  GetConstraints(list1, &y_min, &y_max);
+  GetConstraints(list2, &y_min, &y_max);
+  if (textord_debug_tabfind > 3) {
+    tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
+  }
+  return y_max >= y_min;
+}
+
+// Merge the lists of constraints and update the TabVector pointers.
+// The second list is deleted.
+void TabConstraint::MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) {
+  if (list1 == list2) {
+    return;
+  }
+  TabConstraint_IT it(list2);
+  if (textord_debug_tabfind > 3) {
+    tprintf("Merging constraints\n");
+  }
+  // The vectors of all constraints on list2 are now going to be on list1.
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint *constraint = it.data();
+    if (textord_debug_tabfind > 3) {
+      constraint->vector_->Print("Merge");
+    }
+    if (constraint->is_top_) {
+      constraint->vector_->set_top_constraints(list1);
+    } else {
+      constraint->vector_->set_bottom_constraints(list1);
+    }
+  }
+  it = list1;
+  it.add_list_before(list2);
+  delete list2;
+}
+
+// Set all the tops and bottoms as appropriate to a mean of the
+// constrained range. Delete all the constraints and list.
+void TabConstraint::ApplyConstraints(TabConstraint_LIST *constraints) {
+  int y_min = -INT32_MAX;
+  int y_max = INT32_MAX;
+  GetConstraints(constraints, &y_min, &y_max);
+  int y = (y_min + y_max) / 2;
+  TabConstraint_IT it(constraints);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint *constraint = it.data();
+    TabVector *v = constraint->vector_;
+    if (constraint->is_top_) {
+      v->SetYEnd(y);
+      v->set_top_constraints(nullptr);
+    } else {
+      v->SetYStart(y);
+      v->set_bottom_constraints(nullptr);
+    }
+  }
+  delete constraints;
+}
+
+TabConstraint::TabConstraint(TabVector *vector, bool is_top) : vector_(vector), is_top_(is_top) {
+  if (is_top) {
+    y_min_ = vector->endpt().y();
+    y_max_ = vector->extended_ymax();
+  } else {
+    y_max_ = vector->startpt().y();
+    y_min_ = vector->extended_ymin();
+  }
+}
+
+// Get the max of the mins and the min of the maxes.
+void TabConstraint::GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max) {
+  TabConstraint_IT it(constraints);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint *constraint = it.data();
+    if (textord_debug_tabfind > 3) {
+      tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
+      constraint->vector_->Print(" for");
+    }
+    *y_min = std::max(*y_min, constraint->y_min_);
+    *y_max = std::min(*y_max, constraint->y_max_);
+  }
+}
+
+// The constructor is private. See the bottom of the file...
+
+// Public factory to build a TabVector from a list of boxes.
+// The TabVector will be of the given alignment type.
+// The input vertical vector is used in fitting, and the output
+// vertical_x, vertical_y have the resulting line vector added to them
+// if the alignment is not ragged.
+// The extended_start_y and extended_end_y are the maximum possible
+// extension to the line segment that can be used to align with others.
+// The input CLIST of BLOBNBOX good_points is consumed and taken over.
+TabVector *TabVector::FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
+                                int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
+                                int *vertical_y) {
+  auto *vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points);
+  if (!vector->Fit(vertical, false)) {
+    delete vector;
+    return nullptr;
+  }
+  if (!vector->IsRagged()) {
+    vertical = vector->endpt_ - vector->startpt_;
+    int weight = vector->BoxCount();
+    *vertical_x += vertical.x() * weight;
+    *vertical_y += vertical.y() * weight;
+  }
+  return vector;
+}
+
+// Build a ragged TabVector by copying another's direction, shifting it
+// to match the given blob, and making its initial extent the height
+// of the blob, but its extended bounds from the bounds of the original.
+TabVector::TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
+                     BLOBNBOX *blob)
+    : extended_ymin_(src.extended_ymin_)
+    , extended_ymax_(src.extended_ymax_)
+    , needs_refit_(true)
+    , needs_evaluation_(true)
+    , alignment_(alignment) {
+  BLOBNBOX_C_IT it(&boxes_);
+  it.add_to_end(blob);
+  TBOX box = blob->bounding_box();
+  if (IsLeftTab()) {
+    startpt_ = box.botleft();
+    endpt_ = box.topleft();
+  } else {
+    startpt_ = box.botright();
+    endpt_ = box.topright();
+  }
+  sort_key_ =
+      SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2);
+  if (textord_debug_tabfind > 3) {
+    Print("Constructed a new tab vector:");
+  }
+}
+
+// Copies basic attributes of a tab vector for simple operations.
+// Copies things such startpt, endpt, range.
+// Does not copy things such as partners, boxes, or constraints.
+// This is useful if you only need vector information for processing, such
+// as in the table detection code.
+TabVector *TabVector::ShallowCopy() const {
+  auto *copy = new TabVector();
+  copy->startpt_ = startpt_;
+  copy->endpt_ = endpt_;
+  copy->alignment_ = alignment_;
+  copy->extended_ymax_ = extended_ymax_;
+  copy->extended_ymin_ = extended_ymin_;
+  copy->intersects_other_lines_ = intersects_other_lines_;
+  return copy;
+}
+
+// Extend this vector to include the supplied blob if it doesn't
+// already have it.
+void TabVector::ExtendToBox(BLOBNBOX *new_blob) {
+  TBOX new_box = new_blob->bounding_box();
+  BLOBNBOX_C_IT it(&boxes_);
+  if (!it.empty()) {
+    BLOBNBOX *blob = it.data();
+    TBOX box = blob->bounding_box();
+    while (!it.at_last() && box.top() <= new_box.top()) {
+      if (blob == new_blob) {
+        return; // We have it already.
+      }
+      it.forward();
+      blob = it.data();
+      box = blob->bounding_box();
+    }
+    if (box.top() >= new_box.top()) {
+      it.add_before_stay_put(new_blob);
+      needs_refit_ = true;
+      return;
+    }
+  }
+  needs_refit_ = true;
+  it.add_after_stay_put(new_blob);
+}
+
+// Set the ycoord of the start and move the xcoord to match.
+void TabVector::SetYStart(int start_y) {
+  startpt_.set_x(XAtY(start_y));
+  startpt_.set_y(start_y);
+}
+// Set the ycoord of the end and move the xcoord to match.
+void TabVector::SetYEnd(int end_y) {
+  endpt_.set_x(XAtY(end_y));
+  endpt_.set_y(end_y);
+}
+
+// Rotate the ends by the given vector. Auto flip start and end if needed.
+void TabVector::Rotate(const FCOORD &rotation) {
+  startpt_.rotate(rotation);
+  endpt_.rotate(rotation);
+  int dx = endpt_.x() - startpt_.x();
+  int dy = endpt_.y() - startpt_.y();
+  if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
+    // Need to flip start/end.
+    ICOORD tmp = startpt_;
+    startpt_ = endpt_;
+    endpt_ = tmp;
+  }
+}
+
+// Setup the initial constraints, being the limits of
+// the vector and the extended ends.
+void TabVector::SetupConstraints() {
+  TabConstraint::CreateConstraint(this, false);
+  TabConstraint::CreateConstraint(this, true);
+}
+
+// Setup the constraints between the partners of this TabVector.
+void TabVector::SetupPartnerConstraints() {
+  // With the first and last partner, we want a common bottom and top,
+  // respectively, and for each change of partner, we want a common
+  // top of first with bottom of next.
+  TabVector_C_IT it(&partners_);
+  TabVector *prev_partner = nullptr;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector *partner = it.data();
+    if (partner->top_constraints_ == nullptr || partner->bottom_constraints_ == nullptr) {
+      partner->Print("Impossible: has no constraints");
+      Print("This vector has it as a partner");
+      continue;
+    }
+    if (prev_partner == nullptr) {
+      // This is the first partner, so common bottom.
+      if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
+        TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
+      }
+    } else {
+      // We need prev top to be common with partner bottom.
+      if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
+                                               partner->bottom_constraints_)) {
+        TabConstraint::MergeConstraints(prev_partner->top_constraints_,
+                                        partner->bottom_constraints_);
+      }
+    }
+    prev_partner = partner;
+    if (it.at_last()) {
+      // This is the last partner, so common top.
+      if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
+        TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
+      }
+    }
+  }
+}
+
+// Setup the constraints between this and its partner.
+void TabVector::SetupPartnerConstraints(TabVector *partner) {
+  if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
+    TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
+  }
+  if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
+    TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
+  }
+}
+
+// Use the constraints to modify the top and bottom.
+void TabVector::ApplyConstraints() {
+  if (top_constraints_ != nullptr) {
+    TabConstraint::ApplyConstraints(top_constraints_);
+  }
+  if (bottom_constraints_ != nullptr) {
+    TabConstraint::ApplyConstraints(bottom_constraints_);
+  }
+}
+
+// Merge close tab vectors of the same side that overlap.
+void TabVector::MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
+                                       BlobGrid *grid) {
+  TabVector_IT it1(vectors);
+  for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
+    TabVector *v1 = it1.data();
+    TabVector_IT it2(it1);
+    for (it2.forward(); !it2.at_first(); it2.forward()) {
+      TabVector *v2 = it2.data();
+      if (v2->SimilarTo(vertical, *v1, grid)) {
+        // Merge into the forward one, in case the combined vector now
+        // overlaps one in between.
+        if (textord_debug_tabfind) {
+          v2->Print("Merging");
+          v1->Print("by deleting");
+        }
+        v2->MergeWith(vertical, it1.extract());
+        if (textord_debug_tabfind) {
+          v2->Print("Producing");
+        }
+        ICOORD merged_vector = v2->endpt();
+        merged_vector -= v2->startpt();
+        if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
+          v2->Print("Garbage result of merge?");
+        }
+        break;
+      }
+    }
+  }
+}
+
+// Return true if this vector is the same side, overlaps, and close
+// enough to the other to be merged.
+bool TabVector::SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const {
+  if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) {
+    // If they don't overlap, at least in extensions, then there is no chance.
+    if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) {
+      return false;
+    }
+    // A fast approximation to the scale factor of the sort_key_.
+    int v_scale = abs(vertical.y());
+    if (v_scale == 0) {
+      v_scale = 1;
+    }
+    // If they are close enough, then OK.
+    if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
+        sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) {
+      return true;
+    }
+    // Ragged tabs get a bigger threshold.
+    if (!IsRagged() || !other.IsRagged() ||
+        sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
+        sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) {
+      return false;
+    }
+    if (grid == nullptr) {
+      // There is nothing else to test!
+      return true;
+    }
+    // If there is nothing in the rectangle between the vector that is going to
+    // move, and the place it is moving to, then they can be merged.
+    // Setup a vertical search for any blob.
+    const TabVector *mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other;
+    int top_y = mover->endpt_.y();
+    int bottom_y = mover->startpt_.y();
+    int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
+    int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
+    int shift = abs(sort_key_ - other.sort_key_) / v_scale;
+    if (IsRightTab()) {
+      right += shift;
+    } else {
+      left -= shift;
+    }
+
+    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
+    vsearch.StartVerticalSearch(left, right, top_y);
+    BLOBNBOX *blob;
+    while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
+      const TBOX &box = blob->bounding_box();
+      if (box.top() > bottom_y) {
+        return true; // Nothing found.
+      }
+      if (box.bottom() < top_y) {
+        continue; // Doesn't overlap.
+      }
+      int left_at_box = XAtY(box.bottom());
+      int right_at_box = left_at_box;
+      if (IsRightTab()) {
+        right_at_box += shift;
+      } else {
+        left_at_box -= shift;
+      }
+      if (std::min(right_at_box, static_cast<int>(box.right())) >
+          std::max(left_at_box, static_cast<int>(box.left()))) {
+        return false;
+      }
+    }
+    return true; // Nothing found.
+  }
+  return false;
+}
+
+// Eat the other TabVector into this and delete it.
+void TabVector::MergeWith(const ICOORD &vertical, TabVector *other) {
+  extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
+  extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
+  if (other->IsRagged()) {
+    alignment_ = other->alignment_;
+  }
+  // Merge sort the two lists of boxes.
+  BLOBNBOX_C_IT it1(&boxes_);
+  BLOBNBOX_C_IT it2(&other->boxes_);
+  while (!it2.empty()) {
+    BLOBNBOX *bbox2 = it2.extract();
+    it2.forward();
+    TBOX box2 = bbox2->bounding_box();
+    BLOBNBOX *bbox1 = it1.data();
+    TBOX box1 = bbox1->bounding_box();
+    while (box1.bottom() < box2.bottom() && !it1.at_last()) {
+      it1.forward();
+      bbox1 = it1.data();
+      box1 = bbox1->bounding_box();
+    }
+    if (box1.bottom() < box2.bottom()) {
+      it1.add_to_end(bbox2);
+    } else if (bbox1 != bbox2) {
+      it1.add_before_stay_put(bbox2);
+    }
+  }
+  Fit(vertical, true);
+  other->Delete(this);
+}
+
+// Add a new element to the list of partner TabVectors.
+// Partners must be added in order of increasing y coordinate of the text line
+// that makes them partners.
+// Groups of identical partners are merged into one.
+void TabVector::AddPartner(TabVector *partner) {
+  if (IsSeparator() || partner->IsSeparator()) {
+    return;
+  }
+  TabVector_C_IT it(&partners_);
+  if (!it.empty()) {
+    it.move_to_last();
+    if (it.data() == partner) {
+      return;
+    }
+  }
+  it.add_after_then_move(partner);
+}
+
+// Return true if other is a partner of this.
+bool TabVector::IsAPartner(const TabVector *other) {
+  TabVector_C_IT it(&partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    if (it.data() == other) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// These names must be synced with the TabAlignment enum in tabvector.h.
+static const char *const kAlignmentNames[] = {"Left Aligned",  "Left Ragged",  "Center",
+                                              "Right Aligned", "Right Ragged", "Separator"};
+
+// Print basic information about this tab vector.
+void TabVector::Print(const char *prefix) {
+  tprintf(
+      "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
+      " partners=%d\n",
+      prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
+      mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length());
+}
+
+// Print basic information about this tab vector and every box in it.
+void TabVector::Debug(const char *prefix) {
+  Print(prefix);
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX *bbox = it.data();
+    const TBOX &box = bbox->bounding_box();
+    tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), box.top());
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draw this tabvector in place in the given window.
+void TabVector::Display(ScrollView *tab_win) {
+  if (textord_debug_printable) {
+    tab_win->Pen(ScrollView::BLUE);
+  } else if (alignment_ == TA_LEFT_ALIGNED) {
+    tab_win->Pen(ScrollView::LIME_GREEN);
+  } else if (alignment_ == TA_LEFT_RAGGED) {
+    tab_win->Pen(ScrollView::DARK_GREEN);
+  } else if (alignment_ == TA_RIGHT_ALIGNED) {
+    tab_win->Pen(ScrollView::PINK);
+  } else if (alignment_ == TA_RIGHT_RAGGED) {
+    tab_win->Pen(ScrollView::CORAL);
+  } else {
+    tab_win->Pen(ScrollView::WHITE);
+  }
+  tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
+  tab_win->Pen(ScrollView::GREY);
+  tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
+  tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
+  auto score_string = std::to_string(percent_score_);
+  tab_win->TextAttributes("Times", 50, false, false, false);
+  tab_win->Text(startpt_.x(), startpt_.y(), score_string.c_str());
+}
+
+#endif
+
+// Refit the line and/or re-evaluate the vector if the dirty flags are set.
+void TabVector::FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder) {
+  if (needs_refit_) {
+    Fit(vertical, true);
+  }
+  if (needs_evaluation_) {
+    Evaluate(vertical, finder);
+  }
+}
+
+// Evaluate the vector in terms of coverage of its length by good-looking
+// box edges. A good looking box is one where its nearest neighbour on the
+// inside is nearer than half the distance its nearest neighbour on the
+// outside of the putative column. Bad boxes are removed from the line.
+// A second pass then further filters boxes by requiring that the gutter
+// width be a minimum fraction of the mean gutter along the line.
+void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
+  bool debug = false;
+  needs_evaluation_ = false;
+  int length = endpt_.y() - startpt_.y();
+  if (length == 0 || boxes_.empty()) {
+    percent_score_ = 0;
+    Print("Zero length in evaluate");
+    return;
+  }
+  // Compute the mean box height.
+  BLOBNBOX_C_IT it(&boxes_);
+  int mean_height = 0;
+  int height_count = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX *bbox = it.data();
+    const TBOX &box = bbox->bounding_box();
+    int height = box.height();
+    mean_height += height;
+    ++height_count;
+  }
+  if (height_count > 0) {
+    mean_height /= height_count;
+  }
+  int max_gutter = kGutterMultiple * mean_height;
+  if (IsRagged()) {
+    // Ragged edges face a tougher test in that the gap must always be within
+    // the height of the blob.
+    max_gutter = kGutterToNeighbourRatio * mean_height;
+  }
+
+  STATS gutters(0, max_gutter);
+  // Evaluate the boxes for their goodness, calculating the coverage as we go.
+  // Remove boxes that are not good and shorten the list to the first and
+  // last good boxes.
+  int num_deleted_boxes = 0;
+  bool text_on_image = false;
+  int good_length = 0;
+  const TBOX *prev_good_box = nullptr;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX *bbox = it.data();
+    const TBOX &box = bbox->bounding_box();
+    int mid_y = (box.top() + box.bottom()) / 2;
+    if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
+      if (!debug) {
+        tprintf("After already deleting %d boxes, ", num_deleted_boxes);
+        Print("Starting evaluation");
+      }
+      debug = true;
+    }
+    // A good box is one where the nearest neighbour on the inside is closer
+    // than half the distance to the nearest neighbour on the outside
+    // (of the putative column).
+    bool left = IsLeftTab();
+    int tab_x = XAtY(mid_y);
+    int gutter_width;
+    int neighbour_gap;
+    finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
+                                       &neighbour_gap);
+    if (debug) {
+      tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), box.bottom(),
+              box.right(), box.top(), gutter_width, neighbour_gap);
+    }
+    // Now we can make the test.
+    if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
+      // A good box contributes its height to the good_length.
+      good_length += box.top() - box.bottom();
+      gutters.add(gutter_width, 1);
+      // Two good boxes together contribute the gap between them
+      // to the good_length as well, as long as the gap is not
+      // too big.
+      if (prev_good_box != nullptr) {
+        int vertical_gap = box.bottom() - prev_good_box->top();
+        double size1 = sqrt(static_cast<double>(prev_good_box->area()));
+        double size2 = sqrt(static_cast<double>(box.area()));
+        if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) {
+          good_length += vertical_gap;
+        }
+        if (debug) {
+          tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap,
+                  kMaxFillinMultiple * std::min(size1, size2), good_length);
+        }
+      } else {
+        // Adjust the start to the first good box.
+        SetYStart(box.bottom());
+      }
+      prev_good_box = &box;
+      if (bbox->flow() == BTFT_TEXT_ON_IMAGE) {
+        text_on_image = true;
+      }
+    } else {
+      // Get rid of boxes that are not good.
+      if (debug) {
+        tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(),
+                box.right(), box.top(), gutter_width, neighbour_gap);
+      }
+      it.extract();
+      ++num_deleted_boxes;
+    }
+  }
+  if (debug) {
+    Print("Evaluating:");
+  }
+  // If there are any good boxes, do it again, except this time get rid of
+  // boxes that have a gutter that is a small fraction of the mean gutter.
+  // This filters out ends that run into a coincidental gap in the text.
+  int search_top = endpt_.y();
+  int search_bottom = startpt_.y();
+  int median_gutter = IntCastRounded(gutters.median());
+  if (gutters.get_total() > 0) {
+    prev_good_box = nullptr;
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX *bbox = it.data();
+      const TBOX &box = bbox->bounding_box();
+      int mid_y = (box.top() + box.bottom()) / 2;
+      // A good box is one where the gutter width is at least some constant
+      // fraction of the mean gutter width.
+      bool left = IsLeftTab();
+      int tab_x = XAtY(mid_y);
+      int max_gutter = kGutterMultiple * mean_height;
+      if (IsRagged()) {
+        // Ragged edges face a tougher test in that the gap must always be
+        // within the height of the blob.
+        max_gutter = kGutterToNeighbourRatio * mean_height;
+      }
+      int gutter_width;
+      int neighbour_gap;
+      finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
+                                         &neighbour_gap);
+      // Now we can make the test.
+      if (gutter_width >= median_gutter * kMinGutterFraction) {
+        if (prev_good_box == nullptr) {
+          // Adjust the start to the first good box.
+          SetYStart(box.bottom());
+          search_bottom = box.top();
+        }
+        prev_good_box = &box;
+        search_top = box.bottom();
+      } else {
+        // Get rid of boxes that are not good.
+        if (debug) {
+          tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.left(),
+                  box.bottom(), box.right(), box.top(), gutter_width, median_gutter);
+        }
+        it.extract();
+        ++num_deleted_boxes;
+      }
+    }
+  }
+  // If there has been a good box, adjust the end.
+  if (prev_good_box != nullptr) {
+    SetYEnd(prev_good_box->top());
+    // Compute the percentage of the vector that is occupied by good boxes.
+    int length = endpt_.y() - startpt_.y();
+    percent_score_ = 100 * good_length / length;
+    if (num_deleted_boxes > 0) {
+      needs_refit_ = true;
+      FitAndEvaluateIfNeeded(vertical, finder);
+      if (boxes_.empty()) {
+        return;
+      }
+    }
+    // Test the gutter over the whole vector, instead of just at the boxes.
+    int required_shift;
+    if (search_bottom > search_top) {
+      search_bottom = startpt_.y();
+      search_top = endpt_.y();
+    }
+    double min_gutter_width = kLineCountReciprocal / boxes_.length();
+    min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
+    min_gutter_width *= mean_height;
+    int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
+    if (median_gutter > max_gutter_width) {
+      max_gutter_width = median_gutter;
+    }
+    int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, text_on_image,
+                                           max_gutter_width, &required_shift);
+    if (gutter_width < min_gutter_width) {
+      if (debug) {
+        tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width,
+                min_gutter_width);
+      }
+      boxes_.shallow_clear();
+      percent_score_ = 0;
+    } else if (debug) {
+      tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width,
+              min_gutter_width, required_shift);
+    }
+  } else {
+    // There are no good boxes left, so score is 0.
+    percent_score_ = 0;
+  }
+
+  if (debug) {
+    Print("Evaluation complete:");
+  }
+}
+
+// (Re)Fit a line to the stored points. Returns false if the line
+// is degenerate. Although the TabVector code mostly doesn't care about the
+// direction of lines, XAtY would give silly results for a horizontal line.
+// The class is mostly aimed at use for vertical lines representing
+// horizontal tab stops.
+bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
+  needs_refit_ = false;
+  if (boxes_.empty()) {
+    // Don't refit something with no boxes, as that only happens
+    // in Evaluate, and we don't want to end up with a zero vector.
+    if (!force_parallel) {
+      return false;
+    }
+    // If we are forcing parallel, then we just need to set the sort_key_.
+    ICOORD midpt = startpt_;
+    midpt += endpt_;
+    midpt /= 2;
+    sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
+    return startpt_.y() != endpt_.y();
+  }
+  if (!force_parallel && !IsRagged()) {
+    // Use a fitted line as the vertical.
+    DetLineFit linepoints;
+    BLOBNBOX_C_IT it(&boxes_);
+    // Fit a line to all the boxes in the list.
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX *bbox = it.data();
+      const TBOX &box = bbox->bounding_box();
+      int x1 = IsRightTab() ? box.right() : box.left();
+      ICOORD boxpt(x1, box.bottom());
+      linepoints.Add(boxpt);
+      if (it.at_last()) {
+        ICOORD top_pt(x1, box.top());
+        linepoints.Add(top_pt);
+      }
+    }
+    linepoints.Fit(&startpt_, &endpt_);
+    if (startpt_.y() != endpt_.y()) {
+      vertical = endpt_;
+      vertical -= startpt_;
+    }
+  }
+  int start_y = startpt_.y();
+  int end_y = endpt_.y();
+  sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
+  BLOBNBOX_C_IT it(&boxes_);
+  // Choose a line parallel to the vertical such that all boxes are on the
+  // correct side of it.
+  mean_width_ = 0;
+  int width_count = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX *bbox = it.data();
+    const TBOX &box = bbox->bounding_box();
+    mean_width_ += box.width();
+    ++width_count;
+    int x1 = IsRightTab() ? box.right() : box.left();
+    // Test both the bottom and the top, as one will be more extreme, depending
+    // on the direction of skew.
+    int bottom_y = box.bottom();
+    int top_y = box.top();
+    int key = SortKey(vertical, x1, bottom_y);
+    if (IsLeftTab() == (key < sort_key_)) {
+      sort_key_ = key;
+      startpt_ = ICOORD(x1, bottom_y);
+    }
+    key = SortKey(vertical, x1, top_y);
+    if (IsLeftTab() == (key < sort_key_)) {
+      sort_key_ = key;
+      startpt_ = ICOORD(x1, top_y);
+    }
+    if (it.at_first()) {
+      start_y = bottom_y;
+    }
+    if (it.at_last()) {
+      end_y = top_y;
+    }
+  }
+  if (width_count > 0) {
+    mean_width_ = (mean_width_ + width_count - 1) / width_count;
+  }
+  endpt_ = startpt_ + vertical;
+  needs_evaluation_ = true;
+  if (start_y != end_y) {
+    // Set the ends of the vector to fully include the first and last blobs.
+    startpt_.set_x(XAtY(vertical, sort_key_, start_y));
+    startpt_.set_y(start_y);
+    endpt_.set_x(XAtY(vertical, sort_key_, end_y));
+    endpt_.set_y(end_y);
+    return true;
+  }
+  return false;
+}
+
+// Returns the singleton partner if there is one, or nullptr otherwise.
+TabVector *TabVector::GetSinglePartner() {
+  if (!partners_.singleton()) {
+    return nullptr;
+  }
+  TabVector_C_IT partner_it(&partners_);
+  TabVector *partner = partner_it.data();
+  return partner;
+}
+
+// Return the partner of this TabVector if the vector qualifies as
+// being a vertical text line, otherwise nullptr.
+TabVector *TabVector::VerticalTextlinePartner() {
+  if (!partners_.singleton()) {
+    return nullptr;
+  }
+  TabVector_C_IT partner_it(&partners_);
+  TabVector *partner = partner_it.data();
+  BLOBNBOX_C_IT box_it1(&boxes_);
+  BLOBNBOX_C_IT box_it2(&partner->boxes_);
+  // Count how many boxes are also in the other list.
+  // At the same time, gather the mean width and median vertical gap.
+  if (textord_debug_tabfind > 1) {
+    Print("Testing for vertical text");
+    partner->Print("           partner");
+  }
+  int num_matched = 0;
+  int num_unmatched = 0;
+  int total_widths = 0;
+  int width = startpt().x() - partner->startpt().x();
+  if (width < 0) {
+    width = -width;
+  }
+  STATS gaps(0, width * 2 - 1);
+  BLOBNBOX *prev_bbox = nullptr;
+  box_it2.mark_cycle_pt();
+  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
+    BLOBNBOX *bbox = box_it1.data();
+    TBOX box = bbox->bounding_box();
+    if (prev_bbox != nullptr) {
+      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
+    }
+    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
+           box_it2.data()->bounding_box().bottom() < box.bottom()) {
+      box_it2.forward();
+    }
+    if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN &&
+        (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) {
+      ++num_matched;
+    } else {
+      ++num_unmatched;
+    }
+    total_widths += box.width();
+    prev_bbox = bbox;
+  }
+  if (num_unmatched + num_matched == 0) {
+    return nullptr;
+  }
+  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
+  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
+  int min_box_match =
+      static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio);
+  bool is_vertical =
+      (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap);
+  if (textord_debug_tabfind > 1) {
+    tprintf(
+        "gaps=%d, matched=%d, unmatched=%d, min_match=%d "
+        "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
+        gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width,
+        max_gap, is_vertical ? "Yes" : "No");
+  }
+  return (is_vertical) ? partner : nullptr;
+}
+
+// The constructor is private.
+TabVector::TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment,
+                     BLOBNBOX_CLIST *boxes)
+    : extended_ymin_(extended_ymin)
+    , extended_ymax_(extended_ymax)
+    , sort_key_(0)
+    , percent_score_(0)
+    , mean_width_(0)
+    , needs_refit_(true)
+    , needs_evaluation_(true)
+    , alignment_(alignment)
+    , top_constraints_(nullptr)
+    , bottom_constraints_(nullptr) {
+  BLOBNBOX_C_IT it(&boxes_);
+  it.add_list_after(boxes);
+}
+
+// Delete this, but first, repoint all the partners to point to
+// replacement. If replacement is nullptr, then partner relationships
+// are removed.
+void TabVector::Delete(TabVector *replacement) {
+  TabVector_C_IT it(&partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector *partner = it.data();
+    TabVector_C_IT p_it(&partner->partners_);
+    // If partner already has replacement in its list, then make
+    // replacement null, and just remove this TabVector when we find it.
+    TabVector *partner_replacement = replacement;
+    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+      TabVector *p_partner = p_it.data();
+      if (p_partner == partner_replacement) {
+        partner_replacement = nullptr;
+        break;
+      }
+    }
+    // Remove all references to this, and replace with replacement if not
+    // nullptr.
+    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+      TabVector *p_partner = p_it.data();
+      if (p_partner == this) {
+        p_it.extract();
+        if (partner_replacement != nullptr) {
+          p_it.add_before_stay_put(partner_replacement);
+        }
+      }
+    }
+    if (partner_replacement != nullptr) {
+      partner_replacement->AddPartner(partner);
+    }
+  }
+  delete this;
+}
+
+} // namespace tesseract.