diff mupdf-source/thirdparty/tesseract/src/textord/tablefind.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/textord/tablefind.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,2135 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tablefind.cpp
+// Description: Helper classes to find tables from ColPartitions.
+// Author:      Faisal Shafait (faisal.shafait@dfki.de)
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include "tablefind.h"
+
+#include <allheaders.h>
+
+#include "colpartitionset.h"
+#include "tablerecog.h"
+
+namespace tesseract {
+
+// These numbers are used to calculate the global median stats.
+// They just set an upper bound on the stats objects.
+// Maximum vertical spacing between neighbor partitions.
+const int kMaxVerticalSpacing = 500;
+// Maximum width of a blob in a partition.
+const int kMaxBlobWidth = 500;
+
+// Minimum whitespace size to split a partition (measured as a multiple
+// of a partition's median width).
+const double kSplitPartitionSize = 2.0;
+// To insert text, the partition must satisfy these size constraints
+// in AllowTextPartition(). The idea is to filter noise partitions
+// determined by the size compared to the global medians.
+// TODO(nbeato): Need to find good numbers again.
+const double kAllowTextHeight = 0.5;
+const double kAllowTextWidth = 0.6;
+const double kAllowTextArea = 0.8;
+// The same thing applies to blobs (to filter noise).
+// TODO(nbeato): These numbers are a shot in the dark...
+// height and width are 0.5 * gridsize() in colfind.cpp
+// area is a rough guess for the size of a period.
+const double kAllowBlobHeight = 0.3;
+const double kAllowBlobWidth = 0.4;
+const double kAllowBlobArea = 0.05;
+
+// Minimum number of components in a text partition. A partition having fewer
+// components than that is more likely a data partition and is a candidate
+// table cell.
+const int kMinBoxesInTextPartition = 10;
+
+// Maximum number of components that a data partition can have
+const int kMaxBoxesInDataPartition = 20;
+
+// Maximum allowed gap in a text partitions as a multiple of its median size.
+const double kMaxGapInTextPartition = 4.0;
+
+// Minimum value that the maximum gap in a text partition should have as a
+// factor of its median size.
+const double kMinMaxGapInTextPartition = 0.5;
+
+// The amount of overlap that is "normal" for adjacent blobs in a text
+// partition. This is used to calculate gap between overlapping blobs.
+const double kMaxBlobOverlapFactor = 4.0;
+
+// Maximum x-height a table partition can have as a multiple of global
+// median x-height
+const double kMaxTableCellXheight = 2.0;
+
+// Maximum line spacing between a table column header and column contents
+// for merging the two (as a multiple of the partition's median_height).
+const int kMaxColumnHeaderDistance = 4;
+
+// Minimum ratio of num_table_partitions to num_text_partitions in a column
+// block to be called it a table column
+const double kTableColumnThreshold = 3.0;
+
+// Search for horizontal ruling lines within the vertical margin as a
+// multiple of grid size
+// const int kRulingVerticalMargin = 3;
+
+// Minimum overlap that a colpartition must have with a table region
+// to become part of that table
+const double kMinOverlapWithTable = 0.6;
+
+// Maximum side space (distance from column boundary) that a typical
+// text-line in flowing text should have as a multiple of its x-height
+// (Median size).
+const int kSideSpaceMargin = 10;
+
+// Fraction of the peak of x-projection of a table region to set the
+// threshold for the x-projection histogram
+const double kSmallTableProjectionThreshold = 0.35;
+const double kLargeTableProjectionThreshold = 0.45;
+// Minimum number of rows required to look for more rows in the projection.
+const int kLargeTableRowCount = 6;
+
+// Minimum number of rows in a table
+const int kMinRowsInTable = 3;
+
+// The amount of padding (multiplied by global_median_xheight_ during use)
+// that is vertically added to the search adjacent leader search during
+// ColPartition marking.
+const int kAdjacentLeaderSearchPadding = 2;
+
+// Used when filtering false positives. When finding the last line
+// of a paragraph (typically left-aligned), the previous line should have
+// its center to the right of the last line by this scaled amount.
+const double kParagraphEndingPreviousLineRatio = 1.3;
+
+// The maximum amount of whitespace allowed left of a paragraph ending.
+// Do not filter a ColPartition with more than this space left of it.
+const double kMaxParagraphEndingLeftSpaceMultiple = 3.0;
+
+// Used when filtering false positives. The last line of a paragraph
+// should be preceded by a line that is predominantly text. This is the
+// ratio of text to whitespace (to the right of the text) that is required
+// for the previous line to be a text.
+const double kMinParagraphEndingTextToWhitespaceRatio = 3.0;
+
+// When counting table columns, this is the required gap between two columns
+// (it is multiplied by global_median_xheight_).
+const double kMaxXProjectionGapFactor = 2.0;
+
+// Used for similarity in partitions using stroke width. Values copied
+// from ColFind.cpp in Ray's CL.
+const double kStrokeWidthFractionalTolerance = 0.25;
+const double kStrokeWidthConstantTolerance = 2.0;
+
+#ifndef GRAPHICS_DISABLED
+static BOOL_VAR(textord_show_tables, false, "Show table regions (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_mark, false,
+                "Debug table marking steps in detail (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_stats, false,
+                "Show page stats used in table finding (ScrollView)");
+#endif
+static BOOL_VAR(textord_tablefind_recognize_tables, false,
+                "Enables the table recognizer for table layout and filtering.");
+
+// Templated helper function used to create destructor callbacks for the
+// BBGrid::ClearGridData() method.
+template <typename T>
+void DeleteObject(T *object) {
+  delete object;
+}
+
+TableFinder::TableFinder()
+    : resolution_(0),
+      global_median_xheight_(0),
+      global_median_blob_width_(0),
+      global_median_ledding_(0),
+      left_to_right_language_(true) {}
+
+TableFinder::~TableFinder() {
+  // ColPartitions and ColSegments created by this class for storage in grids
+  // need to be deleted explicitly.
+  clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>);
+  table_grid_.ClearGridData(&DeleteObject<ColSegment>);
+}
+
+void TableFinder::set_left_to_right_language(bool order) {
+  left_to_right_language_ = order;
+}
+
+void TableFinder::Init(int grid_size, const ICOORD &bottom_left,
+                       const ICOORD &top_right) {
+  // Initialize clean partitions list and grid
+  clean_part_grid_.Init(grid_size, bottom_left, top_right);
+  leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
+  fragmented_text_grid_.Init(grid_size, bottom_left, top_right);
+  col_seg_grid_.Init(grid_size, bottom_left, top_right);
+  table_grid_.Init(grid_size, bottom_left, top_right);
+}
+
+// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
+// insert leaders and rulers into the leader_and_ruling_grid_
+void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid,
+                                        TO_BLOCK *block) {
+  // Calculate stats. This lets us filter partitions in AllowTextPartition()
+  // and filter blobs in AllowBlob().
+  SetGlobalSpacings(grid);
+
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // Reject partitions with nothing useful inside of them.
+    if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0) {
+      continue;
+    }
+    ColPartition *clean_part = part->ShallowCopy();
+    ColPartition *leader_part = nullptr;
+    if (part->IsLineType()) {
+      InsertRulingPartition(clean_part);
+      continue;
+    }
+    // Insert all non-text partitions to clean_parts
+    if (!part->IsTextType()) {
+      InsertImagePartition(clean_part);
+      continue;
+    }
+    // Insert text colpartitions after removing noisy components from them
+    // The leaders are split into a separate grid.
+    BLOBNBOX_CLIST *part_boxes = part->boxes();
+    BLOBNBOX_C_IT pit(part_boxes);
+    for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+      BLOBNBOX *pblob = pit.data();
+      // Bad blobs... happens in UNLV set.
+      // news.3G1, page 17 (around x=6)
+      if (!AllowBlob(*pblob)) {
+        continue;
+      }
+      if (pblob->flow() == BTFT_LEADER) {
+        if (leader_part == nullptr) {
+          leader_part = part->ShallowCopy();
+          leader_part->set_flow(BTFT_LEADER);
+        }
+        leader_part->AddBox(pblob);
+      } else if (pblob->region_type() != BRT_NOISE) {
+        clean_part->AddBox(pblob);
+      }
+    }
+    clean_part->ComputeLimits();
+    ColPartition *fragmented = clean_part->CopyButDontOwnBlobs();
+    InsertTextPartition(clean_part);
+    SplitAndInsertFragmentedTextPartition(fragmented);
+    if (leader_part != nullptr) {
+      // TODO(nbeato): Note that ComputeLimits does not update the column
+      // information. So the leader may appear to span more columns than it
+      // really does later on when IsInSameColumnAs gets called to test
+      // for adjacent leaders.
+      leader_part->ComputeLimits();
+      InsertLeaderPartition(leader_part);
+    }
+  }
+
+  // Make the partition partners better for upper and lower neighbors.
+  clean_part_grid_.FindPartitionPartners();
+  clean_part_grid_.RefinePartitionPartners(false);
+}
+
+// High level function to perform table detection
+void TableFinder::LocateTables(ColPartitionGrid *grid,
+                               ColPartitionSet **all_columns,
+                               WidthCallback width_cb, const FCOORD &reskew) {
+  // initialize spacing, neighbors, and columns
+  InitializePartitions(all_columns);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_tables) {
+    ScrollView *table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+    DisplayColPartitionConnections(table_win, &clean_part_grid_,
+                                   ScrollView::ORANGE);
+
+    table_win = MakeWindow(100, 300, "Fragmented Text");
+    DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE);
+  }
+#endif // !GRAPHICS_DISABLED
+
+  // mark, filter, and smooth candidate table partitions
+  MarkTablePartitions();
+
+  // Make single-column blocks from good_columns_ partitions. col_segments are
+  // moved to a grid later which takes the ownership
+  ColSegment_LIST column_blocks;
+  GetColumnBlocks(all_columns, &column_blocks);
+  // Set the ratio of candidate table partitions in each column
+  SetColumnsType(&column_blocks);
+
+  // Move column segments to col_seg_grid_
+  MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_);
+
+  // Detect split in column layout that might have occurred due to the
+  // presence of a table. In such a case, merge the corresponding columns.
+  GridMergeColumnBlocks();
+
+  // Group horizontally overlapping table partitions into table columns.
+  // table_columns created here get deleted at the end of this method.
+  ColSegment_LIST table_columns;
+  GetTableColumns(&table_columns);
+
+  // Within each column, mark the range table regions occupy based on the
+  // table columns detected. table_regions are moved to a grid later which
+  // takes the ownership
+  ColSegment_LIST table_regions;
+  GetTableRegions(&table_columns, &table_regions);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView *table_win = MakeWindow(1200, 300, "Table Columns and Regions");
+    DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE);
+    DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW);
+  }
+#endif // !GRAPHICS_DISABLED
+
+  // Merge table regions across columns for tables spanning multiple
+  // columns
+  MoveColSegmentsToGrid(&table_regions, &table_grid_);
+  GridMergeTableRegions();
+
+  // Adjust table boundaries by including nearby horizontal lines and left
+  // out column headers
+  AdjustTableBoundaries();
+  GridMergeTableRegions();
+
+  if (textord_tablefind_recognize_tables) {
+    // Remove false alarms consisting of a single column
+    DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView *table_win = MakeWindow(1200, 300, "Detected Table Locations");
+      DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+      DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+
+    // Find table grid structure and reject tables that are malformed.
+    RecognizeTables();
+    GridMergeTableRegions();
+    RecognizeTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView *table_win = MakeWindow(1400, 600, "Recognized Tables");
+      DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
+                           ScrollView::BLUE);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+  } else {
+    // Remove false alarms consisting of a single column
+    // TODO(nbeato): verify this is a NOP after structured table rejection.
+    // Right now it isn't. If the recognize function is doing what it is
+    // supposed to do, this function is obsolete.
+    DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView *table_win = MakeWindow(1500, 300, "Detected Tables");
+      DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
+                           ScrollView::BLUE);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+  }
+
+  // Merge all colpartitions in table regions to make them a single
+  // colpartition and revert types of isolated table cells not
+  // assigned to any table to their original types.
+  MakeTableBlocks(grid, all_columns, width_cb);
+}
+// All grids have the same dimensions. The clean_part_grid_ sizes are set from
+// the part_grid_ that is passed to InsertCleanPartitions, which was the same as
+// the grid that is the base of ColumnFinder. Just return the clean_part_grid_
+// dimensions instead of duplicated memory.
+int TableFinder::gridsize() const {
+  return clean_part_grid_.gridsize();
+}
+int TableFinder::gridwidth() const {
+  return clean_part_grid_.gridwidth();
+}
+int TableFinder::gridheight() const {
+  return clean_part_grid_.gridheight();
+}
+const ICOORD &TableFinder::bleft() const {
+  return clean_part_grid_.bleft();
+}
+const ICOORD &TableFinder::tright() const {
+  return clean_part_grid_.tright();
+}
+
+void TableFinder::InsertTextPartition(ColPartition *part) {
+  ASSERT_HOST(part != nullptr);
+  if (AllowTextPartition(*part)) {
+    clean_part_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertFragmentedTextPartition(ColPartition *part) {
+  ASSERT_HOST(part != nullptr);
+  if (AllowTextPartition(*part)) {
+    fragmented_text_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertLeaderPartition(ColPartition *part) {
+  ASSERT_HOST(part != nullptr);
+  if (!part->IsEmpty() && part->bounding_box().area() > 0) {
+    leader_and_ruling_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertRulingPartition(ColPartition *part) {
+  leader_and_ruling_grid_.InsertBBox(true, true, part);
+}
+void TableFinder::InsertImagePartition(ColPartition *part) {
+  // NOTE: If images are placed into a different grid in the future,
+  // the function SetPartitionSpacings needs to be updated. It should
+  // be the only thing that cares about image partitions.
+  clean_part_grid_.InsertBBox(true, true, part);
+}
+
+// Splits a partition into its "words". The splits happen
+// at locations with wide inter-blob spacing. This is useful
+// because it allows the table recognize to "cut through" the
+// text lines on the page. The assumption is that a table
+// will have several lines with similar overlapping whitespace
+// whereas text will not have this type of property.
+// Note: The code assumes that blobs are sorted by the left side x!
+// This will not work (as well) if the blobs are sorted by center/right.
+void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition *part) {
+  ASSERT_HOST(part != nullptr);
+  // Bye bye empty partitions!
+  if (part->boxes()->empty()) {
+    delete part;
+    return;
+  }
+
+  // The AllowBlob function prevents this.
+  ASSERT_HOST(part->median_width() > 0);
+  const double kThreshold = part->median_width() * kSplitPartitionSize;
+
+  ColPartition *right_part = part;
+  bool found_split = true;
+  while (found_split) {
+    found_split = false;
+    BLOBNBOX_C_IT box_it(right_part->boxes());
+    // Blobs are sorted left side first. If blobs overlap,
+    // the previous blob may have a "more right" right side.
+    // Account for this by always keeping the largest "right"
+    // so far.
+    int previous_right = INT32_MIN;
+
+    // Look for the next split in the partition.
+    for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
+      const TBOX &box = box_it.data()->bounding_box();
+      if (previous_right != INT32_MIN &&
+          box.left() - previous_right > kThreshold) {
+        // We have a split position. Split the partition in two pieces.
+        // Insert the left piece in the grid and keep processing the right.
+        int mid_x = (box.left() + previous_right) / 2;
+        ColPartition *left_part = right_part;
+        right_part = left_part->SplitAt(mid_x);
+
+        InsertFragmentedTextPartition(left_part);
+        found_split = true;
+        break;
+      }
+
+      // The right side of the previous blobs.
+      previous_right = std::max(previous_right, static_cast<int>(box.right()));
+    }
+  }
+  // When a split is not found, the right part is minimized
+  // as much as possible, so process it.
+  InsertFragmentedTextPartition(right_part);
+}
+
+// Some simple criteria to filter out now. We want to make sure the
+// average blob size in the partition is consistent with the
+// global page stats.
+// The area metric will almost always pass for multi-blob partitions.
+// It is useful when filtering out noise caused by an isolated blob.
+bool TableFinder::AllowTextPartition(const ColPartition &part) const {
+  const double kHeightRequired = global_median_xheight_ * kAllowTextHeight;
+  const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth;
+  const int median_area = global_median_xheight_ * global_median_blob_width_;
+  const double kAreaPerBlobRequired = median_area * kAllowTextArea;
+  // Keep comparisons strictly greater to disallow 0!
+  return part.median_height() > kHeightRequired &&
+         part.median_width() > kWidthRequired &&
+         part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
+}
+
+// Same as above, applied to blobs. Keep in mind that
+// leaders, commas, and periods are important in tables.
+bool TableFinder::AllowBlob(const BLOBNBOX &blob) const {
+  const TBOX &box = blob.bounding_box();
+  const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight;
+  const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth;
+  const int median_area = global_median_xheight_ * global_median_blob_width_;
+  const double kAreaRequired = median_area * kAllowBlobArea;
+  // Keep comparisons strictly greater to disallow 0!
+  return box.height() > kHeightRequired && box.width() > kWidthRequired &&
+         box.area() > kAreaRequired;
+}
+
+// TODO(nbeato): The grid that makes the window doesn't seem to matter.
+// The only downside is that window messages will be caught by
+// clean_part_grid_ instead of a useful object. This is a temporary solution
+// for the debug windows created by the TableFinder.
+#ifndef GRAPHICS_DISABLED
+ScrollView *TableFinder::MakeWindow(int x, int y, const char *window_name) {
+  return clean_part_grid_.MakeWindow(x, y, window_name);
+}
+#endif
+
+// Make single-column blocks from good_columns_ partitions.
+void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns,
+                                  ColSegment_LIST *column_blocks) {
+  for (int i = 0; i < gridheight(); ++i) {
+    ColPartitionSet *columns = all_columns[i];
+    if (columns != nullptr) {
+      ColSegment_LIST new_blocks;
+      // Get boxes from the current vertical position on the grid
+      columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(),
+                              &new_blocks);
+      // Merge the new_blocks boxes into column_blocks if they are well-aligned
+      GroupColumnBlocks(&new_blocks, column_blocks);
+    }
+  }
+}
+
+// Merge column segments into the current list if they are well aligned.
+void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks,
+                                    ColSegment_LIST *column_blocks) {
+  ColSegment_IT src_it(new_blocks);
+  ColSegment_IT dest_it(column_blocks);
+  // iterate through the source list
+  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+    ColSegment *src_seg = src_it.data();
+    const TBOX &src_box = src_seg->bounding_box();
+    bool match_found = false;
+    // iterate through the destination list to find a matching column block
+    for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
+      ColSegment *dest_seg = dest_it.data();
+      TBOX dest_box = dest_seg->bounding_box();
+      if (ConsecutiveBoxes(src_box, dest_box)) {
+        // If matching block is found, insert the current block into it
+        // and delete the source block.
+        dest_seg->InsertBox(src_box);
+        match_found = true;
+        delete src_it.extract();
+        break;
+      }
+    }
+    // If no match is found, just append the source block to column_blocks
+    if (!match_found) {
+      dest_it.add_after_then_move(src_it.extract());
+    }
+  }
+}
+
+// are the two boxes immediate neighbors along the vertical direction
+bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
+  int x_margin = 20;
+  int y_margin = 5;
+  return (abs(b1.left() - b2.left()) < x_margin) &&
+         (abs(b1.right() - b2.right()) < x_margin) &&
+         (abs(b1.top() - b2.bottom()) < y_margin ||
+          abs(b2.top() - b1.bottom()) < y_margin);
+}
+
+// Set up info for clean_part_grid_ partitions to be valid during detection
+// code.
+void TableFinder::InitializePartitions(ColPartitionSet **all_columns) {
+  FindNeighbors();
+  SetPartitionSpacings(&clean_part_grid_, all_columns);
+  SetGlobalSpacings(&clean_part_grid_);
+}
+
+// Set left, right and top, bottom spacings of each colpartition.
+void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid,
+                                       ColPartitionSet **all_columns) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    ColPartitionSet *columns = all_columns[gsearch.GridY()];
+    TBOX box = part->bounding_box();
+    int y = part->MidY();
+    ColPartition *left_column = columns->ColumnContaining(box.left(), y);
+    ColPartition *right_column = columns->ColumnContaining(box.right(), y);
+    // set distance from left column as space to the left
+    if (left_column) {
+      int left_space = std::max(0, box.left() - left_column->LeftAtY(y));
+      part->set_space_to_left(left_space);
+    }
+    // set distance from right column as space to the right
+    if (right_column) {
+      int right_space = std::max(0, right_column->RightAtY(y) - box.right());
+      part->set_space_to_right(right_space);
+    }
+
+    // Look for images that may be closer.
+    // NOTE: used to be part_grid_, might cause issues now
+    ColPartitionGridSearch hsearch(grid);
+    hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+    ColPartition *neighbor = nullptr;
+    while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
+      if (neighbor->type() == PT_PULLOUT_IMAGE ||
+          neighbor->type() == PT_FLOWING_IMAGE ||
+          neighbor->type() == PT_HEADING_IMAGE) {
+        int right = neighbor->bounding_box().right();
+        if (right < box.left()) {
+          int space = std::min(box.left() - right, part->space_to_left());
+          part->set_space_to_left(space);
+        }
+      }
+    }
+    hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+    neighbor = nullptr;
+    while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
+      if (neighbor->type() == PT_PULLOUT_IMAGE ||
+          neighbor->type() == PT_FLOWING_IMAGE ||
+          neighbor->type() == PT_HEADING_IMAGE) {
+        int left = neighbor->bounding_box().left();
+        if (left > box.right()) {
+          int space = std::min(left - box.right(), part->space_to_right());
+          part->set_space_to_right(space);
+        }
+      }
+    }
+
+    ColPartition *upper_part = part->SingletonPartner(true);
+    if (upper_part) {
+      int space =
+          std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
+                                       part->bounding_box().bottom()));
+      part->set_space_above(space);
+    } else {
+      // TODO(nbeato): What constitutes a good value?
+      // 0 is the default value when not set, explicitly noting it needs to
+      // be something else.
+      part->set_space_above(INT32_MAX);
+    }
+
+    ColPartition *lower_part = part->SingletonPartner(false);
+    if (lower_part) {
+      int space =
+          std::max(0, static_cast<int>(part->bounding_box().bottom() -
+                                       lower_part->bounding_box().bottom()));
+      part->set_space_below(space);
+    } else {
+      // TODO(nbeato): What constitutes a good value?
+      // 0 is the default value when not set, explicitly noting it needs to
+      // be something else.
+      part->set_space_below(INT32_MAX);
+    }
+  }
+}
+
+// Set spacing and closest neighbors above and below a given colpartition.
+void TableFinder::SetVerticalSpacing(ColPartition *part) {
+  TBOX box = part->bounding_box();
+  int top_range =
+      std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
+  int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing,
+                              static_cast<int>(bleft().y()));
+  box.set_top(top_range);
+  box.set_bottom(bottom_range);
+
+  TBOX part_box = part->bounding_box();
+  // Start a rect search
+  ColPartitionGridSearch rectsearch(&clean_part_grid_);
+  rectsearch.StartRectSearch(box);
+  ColPartition *neighbor;
+  int min_space_above = kMaxVerticalSpacing;
+  int min_space_below = kMaxVerticalSpacing;
+  ColPartition *above_neighbor = nullptr;
+  ColPartition *below_neighbor = nullptr;
+  while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+    if (neighbor == part) {
+      continue;
+    }
+    TBOX neighbor_box = neighbor->bounding_box();
+    if (neighbor_box.major_x_overlap(part_box)) {
+      int gap = abs(part->median_bottom() - neighbor->median_bottom());
+      // If neighbor is below current partition
+      if (neighbor_box.top() < part_box.bottom() && gap < min_space_below) {
+        min_space_below = gap;
+        below_neighbor = neighbor;
+      } // If neighbor is above current partition
+      else if (part_box.top() < neighbor_box.bottom() &&
+               gap < min_space_above) {
+        min_space_above = gap;
+        above_neighbor = neighbor;
+      }
+    }
+  }
+  part->set_space_above(min_space_above);
+  part->set_space_below(min_space_below);
+  part->set_nearest_neighbor_above(above_neighbor);
+  part->set_nearest_neighbor_below(below_neighbor);
+}
+
+// Set global spacing and x-height estimates
+void TableFinder::SetGlobalSpacings(ColPartitionGrid *grid) {
+  STATS xheight_stats(0, kMaxVerticalSpacing);
+  STATS width_stats(0, kMaxBlobWidth);
+  STATS ledding_stats(0, kMaxVerticalSpacing);
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // TODO(nbeato): HACK HACK HACK! medians are equal to partition length.
+    // ComputeLimits needs to get called somewhere outside of TableFinder
+    // to make sure the partitions are properly initialized.
+    // When this is called, SmoothPartitionPartners dies in an assert after
+    // table find runs. Alternative solution.
+    // part->ComputeLimits();
+    if (part->IsTextType()) {
+      // xheight_stats.add(part->median_height(), part->boxes_count());
+      // width_stats.add(part->median_width(), part->boxes_count());
+
+      // This loop can be removed when above issues are fixed.
+      // Replace it with the 2 lines commented out above.
+      BLOBNBOX_C_IT it(part->boxes());
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        xheight_stats.add(it.data()->bounding_box().height(), 1);
+        width_stats.add(it.data()->bounding_box().width(), 1);
+      }
+
+      ledding_stats.add(part->space_above(), 1);
+      ledding_stats.add(part->space_below(), 1);
+    }
+  }
+  // Set estimates based on median of statistics obtained
+  set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5));
+  set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5));
+  set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5));
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_stats) {
+    const char *kWindowName = "X-height (R), X-width (G), and ledding (B)";
+    ScrollView *stats_win = MakeWindow(500, 10, kWindowName);
+    xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED);
+    width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN);
+    ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE);
+  }
+#endif // !GRAPHICS_DISABLED
+}
+
+void TableFinder::set_global_median_xheight(int xheight) {
+  global_median_xheight_ = xheight;
+}
+void TableFinder::set_global_median_blob_width(int width) {
+  global_median_blob_width_ = width;
+}
+void TableFinder::set_global_median_ledding(int ledding) {
+  global_median_ledding_ = ledding;
+}
+
+void TableFinder::FindNeighbors() {
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // TODO(nbeato): Rename this function, meaning is different now.
+    // IT is finding nearest neighbors its own way
+    // SetVerticalSpacing(part);
+
+    ColPartition *upper = part->SingletonPartner(true);
+    if (upper) {
+      part->set_nearest_neighbor_above(upper);
+    }
+
+    ColPartition *lower = part->SingletonPartner(false);
+    if (lower) {
+      part->set_nearest_neighbor_below(lower);
+    }
+  }
+}
+
+// High level interface. Input is an unmarked ColPartitionGrid
+// (namely, clean_part_grid_). Partitions are identified using local
+// information and filter/smoothed. The function exit should contain
+// a good sampling of the table partitions.
+void TableFinder::MarkTablePartitions() {
+  MarkPartitionsUsingLocalInformation();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView *table_win = MakeWindow(300, 300, "Initial Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView *table_win = MakeWindow(600, 300, "Filtered Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  SmoothTablePartitionRuns();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView *table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark || textord_show_tables) {
+    ScrollView *table_win = MakeWindow(900, 300, "Final Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+}
+
+// These types of partitions are marked as table partitions:
+//  1- Partitions that have at lease one large gap between words
+//  2- Partitions that consist of only one word (no significant gap
+//     between components)
+//  3- Partitions that vertically overlap with other partitions within the
+//     same column.
+//  4- Partitions with leaders before/after them.
+void TableFinder::MarkPartitionsUsingLocalInformation() {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (!part->IsTextType()) { // Only consider text partitions
+      continue;
+    }
+    // Only consider partitions in dominant font size or smaller
+    if (part->median_height() > kMaxTableCellXheight * global_median_xheight_) {
+      continue;
+    }
+    // Mark partitions with a large gap, or no significant gap as
+    // table partitions.
+    // Comments: It produces several false alarms at:
+    //  - last line of a paragraph (fixed)
+    //  - single word section headings
+    //  - page headers and footers
+    //  - numbered equations
+    //  - line drawing regions
+    // TODO(faisal): detect and fix above-mentioned cases
+    if (HasWideOrNoInterWordGap(part) || HasLeaderAdjacent(*part)) {
+      part->set_table_type();
+    }
+  }
+}
+
+// Check if the partition has at least one large gap between words or no
+// significant gap at all
+bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
+  // Should only get text partitions.
+  ASSERT_HOST(part->IsTextType());
+  // Blob access
+  BLOBNBOX_CLIST *part_boxes = part->boxes();
+  BLOBNBOX_C_IT it(part_boxes);
+  // Check if this is a relatively small partition (such as a single word)
+  if (part->bounding_box().width() <
+          kMinBoxesInTextPartition * part->median_height() &&
+      part_boxes->length() < kMinBoxesInTextPartition) {
+    return true;
+  }
+
+  // Variables used to compute inter-blob spacing.
+  int previous_x1 = -1;
+  // Stores the maximum gap detected.
+  int largest_partition_gap_found = -1;
+  // Text partition gap limits. If this is text (and not a table),
+  // there should be at least one gap larger than min_gap and no gap
+  // larger than max_gap.
+  const double max_gap = kMaxGapInTextPartition * part->median_height();
+  const double min_gap = kMinMaxGapInTextPartition * part->median_height();
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX *blob = it.data();
+    int current_x0 = blob->bounding_box().left();
+    int current_x1 = blob->bounding_box().right();
+    if (previous_x1 != -1) {
+      int gap = current_x0 - previous_x1;
+
+      // TODO(nbeato): Boxes may overlap? Huh?
+      // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors
+      // on the top right of the page are filtered out with this line.
+      // Note 2: Iterating over blobs in a partition, so we are looking for
+      // spacing between the words.
+      if (gap < 0) {
+        // More likely case, the blobs slightly overlap. This can happen
+        // with diacritics (accents) or broken alphabet symbols (characters).
+        // Merge boxes together by taking max of right sides.
+        if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
+          previous_x1 = std::max(previous_x1, current_x1);
+          continue;
+        }
+        // Extreme case, blobs overlap significantly in the same partition...
+        // This should not happen often (if at all), but it does.
+        // TODO(nbeato): investigate cases when this happens.
+        else {
+          // The behavior before was to completely ignore this case.
+        }
+      }
+
+      // If a large enough gap is found, mark it as a table cell (return true)
+      if (gap > max_gap) {
+        return true;
+      }
+      if (gap > largest_partition_gap_found) {
+        largest_partition_gap_found = gap;
+      }
+    }
+    previous_x1 = current_x1;
+  }
+  // Since no large gap was found, return false if the partition is too
+  // long to be a data cell
+  if (part->bounding_box().width() >
+          kMaxBoxesInDataPartition * part->median_height() ||
+      part_boxes->length() > kMaxBoxesInDataPartition) {
+    return false;
+  }
+
+  // A partition may be a single blob. In this case, it's an isolated symbol
+  // or non-text (such as a ruling or image).
+  // Detect these as table partitions? Shouldn't this be case by case?
+  // The behavior before was to ignore this, making max_partition_gap < 0
+  // and implicitly return true. Just making it explicit.
+  if (largest_partition_gap_found == -1) {
+    return true;
+  }
+
+  // return true if the maximum gap found is smaller than the minimum allowed
+  // max_gap in a text partition. This indicates that there is no significant
+  // space in the partition, hence it is likely a single word.
+  return largest_partition_gap_found < min_gap;
+}
+
+// A criteria for possible tables is that a table may have leaders
+// between data cells. An aggressive solution to find such tables is to
+// explicitly mark partitions that have adjacent leaders.
+// Note that this includes overlapping leaders. However, it does not
+// include leaders in different columns on the page.
+// Possible false-positive will include lists, such as a table of contents.
+// As these arise, the aggressive nature of this search may need to be
+// trimmed down.
+bool TableFinder::HasLeaderAdjacent(const ColPartition &part) {
+  if (part.flow() == BTFT_LEADER) {
+    return true;
+  }
+  // Search range is left and right bounded by an offset of the
+  // median xheight. This offset is to allow some tolerance to the
+  // the leaders on the page in the event that the alignment is still
+  // a bit off.
+  const TBOX &box = part.bounding_box();
+  const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
+  const int top = box.top() + search_size;
+  const int bottom = box.bottom() - search_size;
+  ColPartitionGridSearch hsearch(&leader_and_ruling_grid_);
+  for (int direction = 0; direction < 2; ++direction) {
+    bool right_to_left = (direction == 0);
+    int x = right_to_left ? box.right() : box.left();
+    hsearch.StartSideSearch(x, bottom, top);
+    ColPartition *leader = nullptr;
+    while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
+      // The leader could be a horizontal ruling in the grid.
+      // Make sure it is actually a leader.
+      if (leader->flow() != BTFT_LEADER) {
+        continue;
+      }
+      // This should not happen, they are in different grids.
+      ASSERT_HOST(&part != leader);
+      // Make sure the leader shares a page column with the partition,
+      // otherwise we are spreading across columns.
+      if (!part.IsInSameColumnAs(*leader)) {
+        break;
+      }
+      // There should be a significant vertical overlap
+      if (!leader->VSignificantCoreOverlap(part)) {
+        continue;
+      }
+      // Leader passed all tests, so it is adjacent.
+      return true;
+    }
+  }
+  // No leaders are adjacent to the given partition.
+  return false;
+}
+
+// Filter individual text partitions marked as table partitions
+// consisting of paragraph endings, small section headings, and
+// headers and footers.
+void TableFinder::FilterFalseAlarms() {
+  FilterParagraphEndings();
+  FilterHeaderAndFooter();
+  // TODO(nbeato): Fully justified text as non-table?
+}
+
+void TableFinder::FilterParagraphEndings() {
+  // Detect last line of paragraph
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() != PT_TABLE) {
+      continue; // Consider only table partitions
+    }
+
+    // Paragraph ending should have flowing text above it.
+    ColPartition *upper_part = part->nearest_neighbor_above();
+    if (!upper_part) {
+      continue;
+    }
+    if (upper_part->type() != PT_FLOWING_TEXT) {
+      continue;
+    }
+    if (upper_part->bounding_box().width() < 2 * part->bounding_box().width()) {
+      continue;
+    }
+    // Check if its the last line of a paragraph.
+    // In most cases, a paragraph ending should be left-aligned to text line
+    // above it. Sometimes, it could be a 2 line paragraph, in which case
+    // the line above it is indented.
+    // To account for that, check if the partition center is to
+    // the left of the one above it.
+    int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
+    int upper_mid = (upper_part->bounding_box().left() +
+                     upper_part->bounding_box().right()) /
+                    2;
+    int current_spacing = 0; // spacing of the current line to margin
+    int upper_spacing = 0;   // spacing of the previous line to the margin
+    if (left_to_right_language_) {
+      // Left to right languages, use mid - left to figure out the distance
+      // the middle is from the left margin.
+      int left = std::min(part->bounding_box().left(),
+                          upper_part->bounding_box().left());
+      current_spacing = mid - left;
+      upper_spacing = upper_mid - left;
+    } else {
+      // Right to left languages, use right - mid to figure out the distance
+      // the middle is from the right margin.
+      int right = std::max(part->bounding_box().right(),
+                           upper_part->bounding_box().right());
+      current_spacing = right - mid;
+      upper_spacing = right - upper_mid;
+    }
+    if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing) {
+      continue;
+    }
+
+    // Paragraphs should have similar fonts.
+    if (!part->MatchingSizes(*upper_part) ||
+        !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance,
+                                   kStrokeWidthConstantTolerance)) {
+      continue;
+    }
+
+    // The last line of a paragraph should be left aligned.
+    // TODO(nbeato): This would be untrue if the text was right aligned.
+    // How often is that?
+    if (part->space_to_left() >
+        kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) {
+      continue;
+    }
+    // The line above it should be right aligned (assuming justified format).
+    // Since we can't assume justified text, we compare whitespace to text.
+    // The above line should have majority spanning text (or the current
+    // line could have fit on the previous line). So compare
+    // whitespace to text.
+    if (upper_part->bounding_box().width() <
+        kMinParagraphEndingTextToWhitespaceRatio *
+            upper_part->space_to_right()) {
+      continue;
+    }
+
+    // Ledding above the line should be less than ledding below
+    if (part->space_above() >= part->space_below() ||
+        part->space_above() > 2 * global_median_ledding_) {
+      continue;
+    }
+
+    // If all checks failed, it is probably text.
+    part->clear_table_type();
+  }
+}
+
+void TableFinder::FilterHeaderAndFooter() {
+  // Consider top-most text colpartition as header and bottom most as footer
+  ColPartition *header = nullptr;
+  ColPartition *footer = nullptr;
+  int max_top = INT32_MIN;
+  int min_bottom = INT32_MAX;
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (!part->IsTextType()) {
+      continue; // Consider only text partitions
+    }
+    int top = part->bounding_box().top();
+    int bottom = part->bounding_box().bottom();
+    if (top > max_top) {
+      max_top = top;
+      header = part;
+    }
+    if (bottom < min_bottom) {
+      min_bottom = bottom;
+      footer = part;
+    }
+  }
+  if (header) {
+    header->clear_table_type();
+  }
+  if (footer) {
+    footer->clear_table_type();
+  }
+}
+
+// Mark all ColPartitions as table cells that have a table cell above
+// and below them
+// TODO(faisal): This is too aggressive at the moment. The method needs to
+// consider spacing and alignment as well. Detection of false alarm table cells
+// should also be done as part of it.
+void TableFinder::SmoothTablePartitionRuns() {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN) {
+      continue; // Consider only text partitions
+    }
+    ColPartition *upper_part = part->nearest_neighbor_above();
+    ColPartition *lower_part = part->nearest_neighbor_below();
+    if (!upper_part || !lower_part) {
+      continue;
+    }
+    if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE) {
+      part->set_table_type();
+    }
+  }
+
+  // Pass 2, do the opposite. If both the upper and lower neighbors
+  // exist and are not tables, this probably shouldn't be a table.
+  gsearch.StartFullSearch();
+  part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() != PT_TABLE) {
+      continue; // Consider only text partitions
+    }
+    ColPartition *upper_part = part->nearest_neighbor_above();
+    ColPartition *lower_part = part->nearest_neighbor_below();
+
+    // table can't be by itself
+    if ((upper_part && upper_part->type() != PT_TABLE) &&
+        (lower_part && lower_part->type() != PT_TABLE)) {
+      part->clear_table_type();
+    }
+  }
+}
+
+// Set the type of a column segment based on the ratio of table to text cells
+void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) {
+  ColSegment_IT it(column_blocks);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment *seg = it.data();
+    TBOX box = seg->bounding_box();
+    int num_table_cells = 0;
+    int num_text_cells = 0;
+    ColPartitionGridSearch rsearch(&clean_part_grid_);
+    rsearch.SetUniqueMode(true);
+    rsearch.StartRectSearch(box);
+    ColPartition *part = nullptr;
+    while ((part = rsearch.NextRectSearch()) != nullptr) {
+      if (part->type() == PT_TABLE) {
+        num_table_cells++;
+      } else if (part->type() == PT_FLOWING_TEXT) {
+        num_text_cells++;
+      }
+    }
+    // If a column block has no text or table partition in it, it is not needed
+    // for table detection.
+    if (!num_table_cells && !num_text_cells) {
+      delete it.extract();
+    } else {
+      seg->set_num_table_cells(num_table_cells);
+      seg->set_num_text_cells(num_text_cells);
+      // set column type based on the ratio of table to text cells
+      seg->set_type();
+    }
+  }
+}
+
+// Move column blocks to grid
+void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
+                                        ColSegmentGrid *col_seg_grid) {
+  ColSegment_IT it(segments);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment *seg = it.extract();
+    col_seg_grid->InsertBBox(true, true, seg);
+  }
+}
+
+// Merge column blocks if a split is detected due to the presence of a
+// table. A text block is considered split if it has multiple
+// neighboring blocks above/below it, and at least one of the
+// neighboring blocks is of table type (has a high density of table
+// partitions). In this case neighboring blocks in the direction
+// (above/below) of the table block are merged with the text block.
+
+// Comment: This method does not handle split due to a full page table
+// since table columns in this case do not have a text column on which
+// split decision can be based.
+void TableFinder::GridMergeColumnBlocks() {
+  int margin = gridsize();
+
+  // Iterate the Column Blocks in the grid.
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
+      &col_seg_grid_);
+  gsearch.StartFullSearch();
+  ColSegment *seg;
+  while ((seg = gsearch.NextFullSearch()) != nullptr) {
+    if (seg->type() != COL_TEXT) {
+      continue; // only consider text blocks for split detection
+    }
+    bool neighbor_found = false;
+    bool modified = false; // Modified at least once
+    // keep expanding current box as long as neighboring table columns
+    // are found above or below it.
+    do {
+      TBOX box = seg->bounding_box();
+      // slightly expand the search region vertically
+      int top_range =
+          std::min(box.top() + margin, static_cast<int>(tright().y()));
+      int bottom_range =
+          std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
+      box.set_top(top_range);
+      box.set_bottom(bottom_range);
+      neighbor_found = false;
+      GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
+          &col_seg_grid_);
+      rectsearch.StartRectSearch(box);
+      ColSegment *neighbor = nullptr;
+      while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+        if (neighbor == seg) {
+          continue;
+        }
+        const TBOX &neighbor_box = neighbor->bounding_box();
+        // If the neighbor box significantly overlaps with the current
+        // box (due to the expansion of the current box in the
+        // previous iteration of this loop), remove the neighbor box
+        // and expand the current box to include it.
+        if (neighbor_box.overlap_fraction(box) >= 0.9) {
+          seg->InsertBox(neighbor_box);
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+          continue;
+        }
+        // Only expand if the neighbor box is of table type
+        if (neighbor->type() != COL_TABLE) {
+          continue;
+        }
+        // Insert the neighbor box into the current column block
+        if (neighbor_box.major_x_overlap(box) && !box.contains(neighbor_box)) {
+          seg->InsertBox(neighbor_box);
+          neighbor_found = true;
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+        }
+      }
+    } while (neighbor_found);
+    if (modified) {
+      // Because the box has changed, it has to be removed first.
+      gsearch.RemoveBBox();
+      col_seg_grid_.InsertBBox(true, true, seg);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Group horizontally overlapping table partitions into table columns.
+// TODO(faisal): This is too aggressive at the moment. The method should
+// consider more attributes to group table partitions together. Some common
+// errors are:
+//  1- page number is merged with a table column above it even
+//      if there is a large vertical gap between them.
+//  2- column headers go on to catch one of the columns arbitrarily
+//  3- an isolated noise blob near page top or bottom merges with the table
+//     column below/above it
+//  4- cells from two vertically adjacent tables merge together to make a
+//     single column resulting in merging of the two tables
+void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
+  ColSegment_IT it(table_columns);
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition *part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->inside_table_column() || part->type() != PT_TABLE) {
+      continue; // prevent a partition to be assigned to multiple columns
+    }
+    const TBOX &box = part->bounding_box();
+    auto *col = new ColSegment();
+    col->InsertBox(box);
+    part->set_inside_table_column(true);
+    // Start a search below the current cell to find bottom neighbours
+    // Note: a full search will always process things above it first, so
+    // this should be starting at the highest cell and working its way down.
+    ColPartitionGridSearch vsearch(&clean_part_grid_);
+    vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
+    ColPartition *neighbor = nullptr;
+    bool found_neighbours = false;
+    while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) {
+      // only consider neighbors not assigned to any column yet
+      if (neighbor->inside_table_column()) {
+        continue;
+      }
+      // Horizontal lines should not break the flow
+      if (neighbor->IsHorizontalLine()) {
+        continue;
+      }
+      // presence of a non-table neighbor marks the end of current
+      // table column
+      if (neighbor->type() != PT_TABLE) {
+        break;
+      }
+      // add the neighbor partition to the table column
+      const TBOX &neighbor_box = neighbor->bounding_box();
+      col->InsertBox(neighbor_box);
+      neighbor->set_inside_table_column(true);
+      found_neighbours = true;
+    }
+    if (found_neighbours) {
+      it.add_after_then_move(col);
+    } else {
+      part->set_inside_table_column(false);
+      delete col;
+    }
+  }
+}
+
+// Mark regions in a column that are x-bounded by the column boundaries and
+// y-bounded by the table columns' projection on the y-axis as table regions
+void TableFinder::GetTableRegions(ColSegment_LIST *table_columns,
+                                  ColSegment_LIST *table_regions) {
+  ColSegment_IT cit(table_columns);
+  ColSegment_IT rit(table_regions);
+  // Iterate through column blocks
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
+      &col_seg_grid_);
+  gsearch.StartFullSearch();
+  ColSegment *part;
+  int page_height = tright().y() - bleft().y();
+  ASSERT_HOST(page_height > 0);
+  // create a bool array to hold projection on y-axis
+  bool *table_region = new bool[page_height];
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX &part_box = part->bounding_box();
+    // reset the projection array
+    for (int i = 0; i < page_height; i++) {
+      table_region[i] = false;
+    }
+    // iterate through all table columns to find regions in the current
+    // page column block
+    cit.move_to_first();
+    for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
+      TBOX col_box = cit.data()->bounding_box();
+      // find intersection region of table column and page column
+      TBOX intersection_box = col_box.intersection(part_box);
+      // project table column on the y-axis
+      for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) {
+        table_region[i - bleft().y()] = true;
+      }
+    }
+    // set x-limits of table regions to page column width
+    TBOX current_table_box;
+    current_table_box.set_left(part_box.left());
+    current_table_box.set_right(part_box.right());
+    // go through the y-axis projection to find runs of table
+    // regions. Each run makes one table region.
+    for (int i = 1; i < page_height; i++) {
+      // detect start of a table region
+      if (!table_region[i - 1] && table_region[i]) {
+        current_table_box.set_bottom(i + bleft().y());
+      }
+      // TODO(nbeato): Is it guaranteed that the last row is not a table region?
+      // detect end of a table region
+      if (table_region[i - 1] && !table_region[i]) {
+        current_table_box.set_top(i + bleft().y());
+        if (!current_table_box.null_box()) {
+          auto *seg = new ColSegment();
+          seg->InsertBox(current_table_box);
+          rit.add_after_then_move(seg);
+        }
+      }
+    }
+  }
+  delete[] table_region;
+}
+
+// Merge table regions corresponding to tables spanning multiple columns if
+// there is a colpartition (horizontal ruling line or normal text) that
+// touches both regions.
+// TODO(faisal): A rare error occurs if there are two horizontally adjacent
+// tables with aligned ruling lines. In this case, line finder returns a
+// single line and hence the tables get merged together
+void TableFinder::GridMergeTableRegions() {
+  // Iterate the table regions in the grid.
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
+      &table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment *seg = nullptr;
+  while ((seg = gsearch.NextFullSearch()) != nullptr) {
+    bool neighbor_found = false;
+    bool modified = false; // Modified at least once
+    do {
+      // Start a rectangle search x-bounded by the image and y by the table
+      const TBOX &box = seg->bounding_box();
+      TBOX search_region(box);
+      search_region.set_left(bleft().x());
+      search_region.set_right(tright().x());
+      neighbor_found = false;
+      GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
+          &table_grid_);
+      rectsearch.StartRectSearch(search_region);
+      ColSegment *neighbor = nullptr;
+      while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+        if (neighbor == seg) {
+          continue;
+        }
+        const TBOX &neighbor_box = neighbor->bounding_box();
+        // Check if a neighbor box has a large overlap with the table
+        // region.  This may happen as a result of merging two table
+        // regions in the previous iteration.
+        if (neighbor_box.overlap_fraction(box) >= 0.9) {
+          seg->InsertBox(neighbor_box);
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+          modified = true;
+          continue;
+        }
+        // Check if two table regions belong together based on a common
+        // horizontal ruling line
+        if (BelongToOneTable(box, neighbor_box)) {
+          seg->InsertBox(neighbor_box);
+          neighbor_found = true;
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+        }
+      }
+    } while (neighbor_found);
+    if (modified) {
+      // Because the box has changed, it has to be removed first.
+      gsearch.RemoveBBox();
+      table_grid_.InsertBBox(true, true, seg);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Decide if two table regions belong to one table based on a common
+// horizontal ruling line or another colpartition
+bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
+  // Check the obvious case. Most likely not true because overlapping boxes
+  // should already be merged, but seems like a good thing to do in case things
+  // change.
+  if (box1.overlap(box2)) {
+    return true;
+  }
+  // Check for ColPartitions spanning both table regions
+  TBOX bbox = box1.bounding_union(box2);
+  // Start a rect search on bbox
+  ColPartitionGridSearch rectsearch(&clean_part_grid_);
+  rectsearch.StartRectSearch(bbox);
+  ColPartition *part = nullptr;
+  while ((part = rectsearch.NextRectSearch()) != nullptr) {
+    const TBOX &part_box = part->bounding_box();
+    // return true if a colpartition spanning both table regions is found
+    if (part_box.overlap(box1) && part_box.overlap(box2) &&
+        !part->IsImageType()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Adjust table boundaries by:
+//  - building a tight bounding box around all ColPartitions contained in it.
+//  - expanding table boundaries to include all colpartitions that overlap the
+//    table by more than half of their area
+//  - expanding table boundaries to include nearby horizontal rule lines
+//  - expanding table vertically to include left out column headers
+// TODO(faisal): Expansion of table boundaries is quite aggressive. It usually
+//               makes following errors:
+//  1- horizontal lines consisting of underlines are included in the table if
+//     they are close enough
+//  2- horizontal lines originating from noise tend to get merged with a table
+//     near the top of the page
+//  3- the criteria for including horizontal lines is very generous. Many times
+//     horizontal lines separating headers and footers get merged with a
+//     single-column table in a multi-column page thereby including text
+//     from the neighboring column inside the table
+//  4- the criteria for including left out column headers also tends to
+//     occasionally include text-lines above the tables, typically from
+//     table caption
+void TableFinder::AdjustTableBoundaries() {
+  // Iterate the table regions in the grid
+  ColSegment_CLIST adjusted_tables;
+  ColSegment_C_IT it(&adjusted_tables);
+  ColSegmentGridSearch gsearch(&table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment *table = nullptr;
+  while ((table = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX &table_box = table->bounding_box();
+    TBOX grown_box = table_box;
+    GrowTableBox(table_box, &grown_box);
+    // To prevent a table from expanding again, do not insert the
+    // modified box back to the grid. Instead move it to a list and
+    // and remove it from the grid. The list is moved later back to the grid.
+    if (!grown_box.null_box()) {
+      auto *col = new ColSegment();
+      col->InsertBox(grown_box);
+      it.add_after_then_move(col);
+    }
+    gsearch.RemoveBBox();
+    delete table;
+  }
+  // clear table grid to move final tables in it
+  // TODO(nbeato): table_grid_ should already be empty. The above loop
+  // removed everything. Maybe just assert it is empty?
+  table_grid_.Clear();
+  it.move_to_first();
+  // move back final tables to table_grid_
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment *seg = it.extract();
+    table_grid_.InsertBBox(true, true, seg);
+  }
+}
+
+void TableFinder::GrowTableBox(const TBOX &table_box, TBOX *result_box) {
+  // TODO(nbeato): The growing code is a bit excessive right now.
+  // By removing these lines, the partitions considered need
+  // to have some overlap or be special cases. These lines could
+  // be added again once a check is put in place to make sure that
+  // growing tables don't stomp on a lot of non-table partitions.
+
+  // search for horizontal ruling lines within the vertical margin
+  // int vertical_margin = kRulingVerticalMargin * gridsize();
+  TBOX search_box = table_box;
+  // int top = MIN(search_box.top() + vertical_margin, tright().y());
+  // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y());
+  // search_box.set_top(top);
+  // search_box.set_bottom(bottom);
+
+  GrowTableToIncludePartials(table_box, search_box, result_box);
+  GrowTableToIncludeLines(table_box, search_box, result_box);
+  IncludeLeftOutColumnHeaders(result_box);
+}
+
+// Grow a table by increasing the size of the box to include
+// partitions with significant overlap with the table.
+void TableFinder::GrowTableToIncludePartials(const TBOX &table_box,
+                                             const TBOX &search_range,
+                                             TBOX *result_box) {
+  // Rulings are in a different grid, so search 2 grids for rulings, text,
+  // and table partitions that are not entirely within the new box.
+  for (int i = 0; i < 2; ++i) {
+    ColPartitionGrid *grid =
+        (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_;
+    ColPartitionGridSearch rectsearch(grid);
+    rectsearch.StartRectSearch(search_range);
+    ColPartition *part = nullptr;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+      // Only include text and table types.
+      if (part->IsImageType()) {
+        continue;
+      }
+      const TBOX &part_box = part->bounding_box();
+      // Include partition in the table if more than half of it
+      // is covered by the table
+      if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+        *result_box = result_box->bounding_union(part_box);
+        continue;
+      }
+    }
+  }
+}
+
+// Grow a table by expanding to the extents of significantly
+// overlapping lines.
+void TableFinder::GrowTableToIncludeLines(const TBOX &table_box,
+                                          const TBOX &search_range,
+                                          TBOX *result_box) {
+  ColPartitionGridSearch rsearch(&leader_and_ruling_grid_);
+  rsearch.SetUniqueMode(true);
+  rsearch.StartRectSearch(search_range);
+  ColPartition *part = nullptr;
+  while ((part = rsearch.NextRectSearch()) != nullptr) {
+    // TODO(nbeato) This should also do vertical, but column
+    // boundaries are breaking things. This function needs to be
+    // updated to allow vertical lines as well.
+    if (!part->IsLineType()) {
+      continue;
+    }
+    // Avoid the following function call if the result of the
+    // function is irrelevant.
+    const TBOX &part_box = part->bounding_box();
+    if (result_box->contains(part_box)) {
+      continue;
+    }
+    // Include a partially overlapping horizontal line only if the
+    // extra ColPartitions that will be included due to expansion
+    // have large side spacing w.r.t. columns containing them.
+    if (HLineBelongsToTable(*part, table_box)) {
+      *result_box = result_box->bounding_union(part_box);
+    }
+    // TODO(nbeato): Vertical
+  }
+}
+
+// Checks whether the horizontal line belong to the table by looking at the
+// side spacing of extra ColPartitions that will be included in the table
+// due to expansion
+bool TableFinder::HLineBelongsToTable(const ColPartition &part,
+                                      const TBOX &table_box) {
+  if (!part.IsHorizontalLine()) {
+    return false;
+  }
+  const TBOX &part_box = part.bounding_box();
+  if (!part_box.major_x_overlap(table_box)) {
+    return false;
+  }
+  // Do not consider top-most horizontal line since it usually
+  // originates from noise.
+  // TODO(nbeato): I had to comment this out because the ruling grid doesn't
+  // have neighbors solved.
+  // if (!part.nearest_neighbor_above())
+  //   return false;
+  const TBOX bbox = part_box.bounding_union(table_box);
+  // In the "unioned table" box (the table extents expanded by the line),
+  // keep track of how many partitions have significant padding to the left
+  // and right. If more than half of the partitions covered by the new table
+  // have significant spacing, the line belongs to the table and the table
+  // grows to include all of the partitions.
+  int num_extra_partitions = 0;
+  int extra_space_to_right = 0;
+  int extra_space_to_left = 0;
+  // Rulings are in a different grid, so search 2 grids for rulings, text,
+  // and table partitions that are introduced by the new box.
+  for (int i = 0; i < 2; ++i) {
+    ColPartitionGrid *grid =
+        (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_;
+    // Start a rect search on bbox
+    ColPartitionGridSearch rectsearch(grid);
+    rectsearch.SetUniqueMode(true);
+    rectsearch.StartRectSearch(bbox);
+    ColPartition *extra_part = nullptr;
+    while ((extra_part = rectsearch.NextRectSearch()) != nullptr) {
+      // ColPartition already in table
+      const TBOX &extra_part_box = extra_part->bounding_box();
+      if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+        continue;
+      }
+      // Non-text ColPartitions do not contribute
+      if (extra_part->IsImageType()) {
+        continue;
+      }
+      // Consider this partition.
+      num_extra_partitions++;
+      // presence of a table cell is a strong hint, so just increment the scores
+      // without looking at the spacing.
+      if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) {
+        extra_space_to_right++;
+        extra_space_to_left++;
+        continue;
+      }
+      int space_threshold = kSideSpaceMargin * part.median_height();
+      if (extra_part->space_to_right() > space_threshold) {
+        extra_space_to_right++;
+      }
+      if (extra_part->space_to_left() > space_threshold) {
+        extra_space_to_left++;
+      }
+    }
+  }
+  // tprintf("%d %d %d\n",
+  // num_extra_partitions,extra_space_to_right,extra_space_to_left);
+  return (extra_space_to_right > num_extra_partitions / 2) ||
+         (extra_space_to_left > num_extra_partitions / 2);
+}
+
+// Look for isolated column headers above the given table box and
+// include them in the table
+void TableFinder::IncludeLeftOutColumnHeaders(TBOX *table_box) {
+  // Start a search above the current table to look for column headers
+  ColPartitionGridSearch vsearch(&clean_part_grid_);
+  vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
+                              table_box->top());
+  ColPartition *neighbor = nullptr;
+  ColPartition *previous_neighbor = nullptr;
+  while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
+    // Max distance to find a table heading.
+    const int max_distance =
+        kMaxColumnHeaderDistance * neighbor->median_height();
+    int table_top = table_box->top();
+    const TBOX &box = neighbor->bounding_box();
+    // Do not continue if the next box is way above
+    if (box.bottom() - table_top > max_distance) {
+      break;
+    }
+    // Unconditionally include partitions of type TABLE or LINE
+    // TODO(faisal): add some reasonable conditions here
+    if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) {
+      table_box->set_top(box.top());
+      previous_neighbor = nullptr;
+      continue;
+    }
+    // If there are two text partitions, one above the other, without a table
+    // cell on their left or right side, consider them a barrier and quit
+    if (previous_neighbor == nullptr) {
+      previous_neighbor = neighbor;
+    } else {
+      const TBOX &previous_box = previous_neighbor->bounding_box();
+      if (!box.major_y_overlap(previous_box)) {
+        break;
+      }
+    }
+  }
+}
+
+// Remove false alarms consisting of a single column based on their
+// projection on the x-axis. Projection of a real table on the x-axis
+// should have at least one zero-valley larger than the global median
+// x-height of the page.
+void TableFinder::DeleteSingleColumnTables() {
+  int page_width = tright().x() - bleft().x();
+  ASSERT_HOST(page_width > 0);
+  // create an integer array to hold projection on x-axis
+  int *table_xprojection = new int[page_width];
+  // Iterate through all tables in the table grid
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
+      &table_grid_);
+  table_search.StartFullSearch();
+  ColSegment *table;
+  while ((table = table_search.NextFullSearch()) != nullptr) {
+    TBOX table_box = table->bounding_box();
+    // reset the projection array
+    for (int i = 0; i < page_width; i++) {
+      table_xprojection[i] = 0;
+    }
+    // Start a rect search on table_box
+    ColPartitionGridSearch rectsearch(&clean_part_grid_);
+    rectsearch.SetUniqueMode(true);
+    rectsearch.StartRectSearch(table_box);
+    ColPartition *part;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+      if (!part->IsTextType()) {
+        continue; // Do not consider non-text partitions
+      }
+      if (part->flow() == BTFT_LEADER) {
+        continue; // Assume leaders are in tables
+      }
+      TBOX part_box = part->bounding_box();
+      // Do not consider partitions partially covered by the table
+      if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) {
+        continue;
+      }
+      BLOBNBOX_CLIST *part_boxes = part->boxes();
+      BLOBNBOX_C_IT pit(part_boxes);
+
+      // Make sure overlapping blobs don't artificially inflate the number
+      // of rows in the table. This happens frequently with things such as
+      // decimals and split characters. Do this by assuming the column
+      // partition is sorted mostly left to right and just clip
+      // bounding boxes by the previous box's extent.
+      int next_position_to_write = 0;
+
+      for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+        BLOBNBOX *pblob = pit.data();
+        // ignore blob height for the purpose of projection since we
+        // are only interested in finding valleys
+        int xstart = pblob->bounding_box().left();
+        int xend = pblob->bounding_box().right();
+
+        xstart = std::max(xstart, next_position_to_write);
+        for (int i = xstart; i < xend; i++) {
+          table_xprojection[i - bleft().x()]++;
+        }
+        next_position_to_write = xend;
+      }
+    }
+    // Find largest valley between two reasonable peaks in the table
+    if (!GapInXProjection(table_xprojection, page_width)) {
+      table_search.RemoveBBox();
+      delete table;
+    }
+  }
+  delete[] table_xprojection;
+}
+
+// Return true if at least one gap larger than the global x-height
+// exists in the horizontal projection
+bool TableFinder::GapInXProjection(int *xprojection, int length) {
+  // Find peak value of the histogram
+  int peak_value = 0;
+  for (int i = 0; i < length; i++) {
+    if (xprojection[i] > peak_value) {
+      peak_value = xprojection[i];
+    }
+  }
+  // Peak value represents the maximum number of horizontally
+  // overlapping colpartitions, so this can be considered as the
+  // number of rows in the table
+  if (peak_value < kMinRowsInTable) {
+    return false;
+  }
+  double projection_threshold = kSmallTableProjectionThreshold * peak_value;
+  if (peak_value >= kLargeTableRowCount) {
+    projection_threshold = kLargeTableProjectionThreshold * peak_value;
+  }
+  // Threshold the histogram
+  for (int i = 0; i < length; i++) {
+    xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
+  }
+  // Find the largest run of zeros between two ones
+  int largest_gap = 0;
+  int run_start = -1;
+  for (int i = 1; i < length; i++) {
+    // detect start of a run of zeros
+    if (xprojection[i - 1] && !xprojection[i]) {
+      run_start = i;
+    }
+    // detect end of a run of zeros and update the value of largest gap
+    if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
+      int gap = i - run_start;
+      if (gap > largest_gap) {
+        largest_gap = gap;
+      }
+      run_start = -1;
+    }
+  }
+  return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
+}
+
+// Given the location of a table "guess", try to overlay a cellular
+// grid in the location, adjusting the boundaries.
+// TODO(nbeato): Falsely introduces:
+//   -headers/footers (not any worse, too much overlap destroys cells)
+//   -page numbers (not worse, included because maximize margins)
+//   -equations (nicely fit into a celluar grid, but more sparsely)
+//   -figures (random text box, also sparse)
+//   -small left-aligned text areas with overlapping positioned whitespace
+//       (rejected before)
+// Overall, this just needs some more work.
+void TableFinder::RecognizeTables() {
+#ifndef GRAPHICS_DISABLED
+  ScrollView *table_win = nullptr;
+  if (textord_show_tables) {
+    table_win = MakeWindow(0, 0, "Table Structure");
+    DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE,
+                         ScrollView::LIGHT_BLUE);
+    // table_grid_.DisplayBoxes(table_win);
+  }
+#endif
+
+  TableRecognizer recognizer;
+  recognizer.Init();
+  recognizer.set_line_grid(&leader_and_ruling_grid_);
+  recognizer.set_text_grid(&fragmented_text_grid_);
+  recognizer.set_max_text_height(global_median_xheight_ * 2.0);
+  recognizer.set_min_height(1.5 * gridheight());
+  // Loop over all of the tables and try to fit them.
+  // Store the good tables here.
+  ColSegment_CLIST good_tables;
+  ColSegment_C_IT good_it(&good_tables);
+
+  ColSegmentGridSearch gsearch(&table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment *found_table = nullptr;
+  while ((found_table = gsearch.NextFullSearch()) != nullptr) {
+    gsearch.RemoveBBox();
+
+    // The goal is to make the tables persistent in a list.
+    // When that happens, this will move into the search loop.
+    const TBOX &found_box = found_table->bounding_box();
+    StructuredTable *table_structure = recognizer.RecognizeTable(found_box);
+
+    // Process a table. Good tables are inserted into the grid again later on
+    // We can't change boxes in the grid while it is running a search.
+    if (table_structure != nullptr) {
+#ifndef GRAPHICS_DISABLED
+      if (textord_show_tables) {
+        table_structure->Display(table_win, ScrollView::LIME_GREEN);
+      }
+#endif
+      found_table->set_bounding_box(table_structure->bounding_box());
+      delete table_structure;
+      good_it.add_after_then_move(found_table);
+    } else {
+      delete found_table;
+    }
+  }
+  // TODO(nbeato): MERGE!! There is awesome info now available for merging.
+
+  // At this point, the grid is empty. We can safely insert the good tables
+  // back into grid.
+  for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward()) {
+    table_grid_.InsertBBox(true, true, good_it.extract());
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the column segments in some window.
+void TableFinder::DisplayColSegments(ScrollView *win, ColSegment_LIST *segments,
+                                     ScrollView::Color color) {
+  win->Pen(color);
+  win->Brush(ScrollView::NONE);
+  ColSegment_IT it(segments);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment *col = it.data();
+    const TBOX &box = col->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  win->UpdateWindow();
+}
+
+// Displays the colpartitions using a new coloring on an existing window.
+// Note: This method is only for debug purpose during development and
+// would not be part of checked in code
+void TableFinder::DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
+                                       ScrollView::Color default_color,
+                                       ScrollView::Color table_color) {
+  ScrollView::Color color = default_color;
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    color = default_color;
+    if (part->type() == PT_TABLE) {
+      color = table_color;
+    }
+
+    const TBOX &box = part->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    win->Brush(ScrollView::NONE);
+    win->Pen(color);
+    win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  win->UpdateWindow();
+}
+
+void TableFinder::DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
+                                       ScrollView::Color default_color) {
+  DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
+}
+
+void TableFinder::DisplayColPartitionConnections(ScrollView *win,
+                                                 ColPartitionGrid *grid,
+                                                 ScrollView::Color color) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX &box = part->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+
+    ColPartition *upper_part = part->nearest_neighbor_above();
+    if (upper_part) {
+      const TBOX &upper_box = upper_part->bounding_box();
+      int mid_x = (left_x + right_x) / 2;
+      int mid_y = (top_y + bottom_y) / 2;
+      int other_x = (upper_box.left() + upper_box.right()) / 2;
+      int other_y = (upper_box.top() + upper_box.bottom()) / 2;
+      win->Brush(ScrollView::NONE);
+      win->Pen(color);
+      win->Line(mid_x, mid_y, other_x, other_y);
+    }
+    ColPartition *lower_part = part->nearest_neighbor_below();
+    if (lower_part) {
+      const TBOX &lower_box = lower_part->bounding_box();
+      int mid_x = (left_x + right_x) / 2;
+      int mid_y = (top_y + bottom_y) / 2;
+      int other_x = (lower_box.left() + lower_box.right()) / 2;
+      int other_y = (lower_box.top() + lower_box.bottom()) / 2;
+      win->Brush(ScrollView::NONE);
+      win->Pen(color);
+      win->Line(mid_x, mid_y, other_x, other_y);
+    }
+  }
+  win->UpdateWindow();
+}
+
+#endif
+
+// Merge all colpartitions in table regions to make them a single
+// colpartition and revert types of isolated table cells not
+// assigned to any table to their original types.
+void TableFinder::MakeTableBlocks(ColPartitionGrid *grid,
+                                  ColPartitionSet **all_columns,
+                                  const WidthCallback &width_cb) {
+  // Since we have table blocks already, remove table tags from all
+  // colpartitions
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition *part = nullptr;
+
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() == PT_TABLE) {
+      part->clear_table_type();
+    }
+  }
+  // Now make a single colpartition out of each table block and remove
+  // all colpartitions contained within a table
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
+      &table_grid_);
+  table_search.StartFullSearch();
+  ColSegment *table;
+  while ((table = table_search.NextFullSearch()) != nullptr) {
+    const TBOX &table_box = table->bounding_box();
+    // Start a rect search on table_box
+    ColPartitionGridSearch rectsearch(grid);
+    rectsearch.StartRectSearch(table_box);
+    ColPartition *part;
+    ColPartition *table_partition = nullptr;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+      // Do not consider image partitions
+      if (!part->IsTextType()) {
+        continue;
+      }
+      TBOX part_box = part->bounding_box();
+      // Include partition in the table if more than half of it
+      // is covered by the table
+      if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+        rectsearch.RemoveBBox();
+        if (table_partition) {
+          table_partition->Absorb(part, width_cb);
+        } else {
+          table_partition = part;
+        }
+      }
+    }
+    // Insert table colpartition back to part_grid_
+    if (table_partition) {
+      // To match the columns used when transforming to blocks, the new table
+      // partition must have its first and last column set at the grid y that
+      // corresponds to its bottom.
+      const TBOX &table_box = table_partition->bounding_box();
+      int grid_x, grid_y;
+      grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
+      table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
+      table_partition->set_table_type();
+      table_partition->set_blob_type(BRT_TEXT);
+      table_partition->set_flow(BTFT_CHAIN);
+      table_partition->SetBlobTypes();
+      grid->InsertBBox(true, true, table_partition);
+    }
+  }
+}
+
+//////// ColSegment code
+////////
+ColSegment::ColSegment()
+    : ELIST_LINK(),
+      num_table_cells_(0),
+      num_text_cells_(0),
+      type_(COL_UNKNOWN) {}
+
+// Provides a color for BBGrid to draw the rectangle.
+ScrollView::Color ColSegment::BoxColor() const {
+  const ScrollView::Color kBoxColors[PT_COUNT] = {
+      ScrollView::YELLOW,
+      ScrollView::BLUE,
+      ScrollView::YELLOW,
+      ScrollView::MAGENTA,
+  };
+  return kBoxColors[type_];
+}
+
+// Insert a box into this column segment
+void ColSegment::InsertBox(const TBOX &other) {
+  bounding_box_ = bounding_box_.bounding_union(other);
+}
+
+// Set column segment type based on the ratio of text and table partitions
+// in it.
+void ColSegment::set_type() {
+  if (num_table_cells_ > kTableColumnThreshold * num_text_cells_) {
+    type_ = COL_TABLE;
+  } else if (num_text_cells_ > num_table_cells_) {
+    type_ = COL_TEXT;
+  } else {
+    type_ = COL_MIXED;
+  }
+}
+
+} // namespace tesseract.