diff mupdf-source/thirdparty/tesseract/src/classify/shapetable.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/shapetable.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,749 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        shapetable.cpp
+// Description: Class to map a classifier shape index to unicharset
+//              indices and font indices.
+// Author:      Ray Smith
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "shapetable.h"
+
+#include "bitvector.h"
+#include "fontinfo.h"
+#include "intfeaturespace.h"
+#include "unicharset.h"
+#include "unicity_table.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Helper function to get the index of the first result with the required
+// unichar_id. If the results are sorted by rating, this will also be the
+// best result with the required unichar_id.
+// Returns -1 if the unichar_id is not found
+int ShapeRating::FirstResultWithUnichar(const std::vector<ShapeRating> &results,
+                                        const ShapeTable &shape_table, UNICHAR_ID unichar_id) {
+  for (unsigned r = 0; r < results.size(); ++r) {
+    const auto shape_id = results[r].shape_id;
+    const Shape &shape = shape_table.GetShape(shape_id);
+    if (shape.ContainsUnichar(unichar_id)) {
+      return r;
+    }
+  }
+  return -1;
+}
+
+// Helper function to get the index of the first result with the required
+// unichar_id. If the results are sorted by rating, this will also be the
+// best result with the required unichar_id.
+// Returns -1 if the unichar_id is not found
+int UnicharRating::FirstResultWithUnichar(const std::vector<UnicharRating> &results,
+                                          UNICHAR_ID unichar_id) {
+  for (unsigned r = 0; r < results.size(); ++r) {
+    if (results[r].unichar_id == unichar_id) {
+      return r;
+    }
+  }
+  return -1;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool UnicharAndFonts::Serialize(FILE *fp) const {
+  return tesseract::Serialize(fp, &unichar_id) && tesseract::Serialize(fp, font_ids);
+}
+
+// Reads from the given file. Returns false in case of error.
+bool UnicharAndFonts::DeSerialize(TFile *fp) {
+  return fp->DeSerialize(&unichar_id) && fp->DeSerialize(font_ids);
+}
+
+// Sort function to sort a pair of UnicharAndFonts by unichar_id.
+int UnicharAndFonts::SortByUnicharId(const void *v1, const void *v2) {
+  const auto *p1 = static_cast<const UnicharAndFonts *>(v1);
+  const auto *p2 = static_cast<const UnicharAndFonts *>(v2);
+  return p1->unichar_id - p2->unichar_id;
+}
+
+bool UnicharAndFonts::StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2) {
+  return v1.unichar_id < v2.unichar_id;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool Shape::Serialize(FILE *fp) const {
+  uint8_t sorted = unichars_sorted_;
+  return tesseract::Serialize(fp, &sorted) && tesseract::Serialize(fp, unichars_);
+}
+// Reads from the given file. Returns false in case of error.
+
+bool Shape::DeSerialize(TFile *fp) {
+  uint8_t sorted;
+  if (!fp->DeSerialize(&sorted)) {
+    return false;
+  }
+  unichars_sorted_ = sorted != 0;
+  return fp->DeSerialize(unichars_);
+}
+
+// Adds a font_id for the given unichar_id. If the unichar_id is not
+// in the shape, it is added.
+void Shape::AddToShape(int unichar_id, int font_id) {
+  for (auto &unichar : unichars_) {
+    if (unichar.unichar_id == unichar_id) {
+      // Found the unichar in the shape table.
+      std::vector<int> &font_list = unichar.font_ids;
+      for (int f : font_list) {
+        if (f == font_id) {
+          return; // Font is already there.
+        }
+      }
+      font_list.push_back(font_id);
+      return;
+    }
+  }
+  // Unichar_id is not in shape, so add it to shape.
+  unichars_.emplace_back(unichar_id, font_id);
+  unichars_sorted_ = unichars_.size() <= 1;
+}
+
+// Adds everything in other to this.
+void Shape::AddShape(const Shape &other) {
+  for (const auto &unichar : other.unichars_) {
+    for (unsigned f = 0; f < unichar.font_ids.size(); ++f) {
+      AddToShape(unichar.unichar_id, unichar.font_ids[f]);
+    }
+  }
+  unichars_sorted_ = unichars_.size() <= 1;
+}
+
+// Returns true if the shape contains the given unichar_id, font_id pair.
+bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const {
+  for (const auto &unichar : unichars_) {
+    if (unichar.unichar_id == unichar_id) {
+      // Found the unichar, so look for the font.
+      auto &font_list = unichar.font_ids;
+      for (int f : font_list) {
+        if (f == font_id) {
+          return true;
+        }
+      }
+      return false;
+    }
+  }
+  return false;
+}
+
+// Returns true if the shape contains the given unichar_id, ignoring font.
+bool Shape::ContainsUnichar(int unichar_id) const {
+  for (const auto &unichar : unichars_) {
+    if (unichar.unichar_id == unichar_id) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if the shape contains the given font, ignoring unichar_id.
+bool Shape::ContainsFont(int font_id) const {
+  for (const auto &unichar : unichars_) {
+    auto &font_list = unichar.font_ids;
+    for (int f : font_list) {
+      if (f == font_id) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+// Returns true if the shape contains the given font properties, ignoring
+// unichar_id.
+bool Shape::ContainsFontProperties(const FontInfoTable &font_table, uint32_t properties) const {
+  for (const auto &unichar : unichars_) {
+    auto &font_list = unichar.font_ids;
+    for (int f : font_list) {
+      if (font_table.at(f).properties == properties) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+// Returns true if the shape contains multiple different font properties,
+// ignoring unichar_id.
+bool Shape::ContainsMultipleFontProperties(const FontInfoTable &font_table) const {
+  uint32_t properties = font_table.at(unichars_[0].font_ids[0]).properties;
+  for (const auto &unichar : unichars_) {
+    auto &font_list = unichar.font_ids;
+    for (int f : font_list) {
+      if (font_table.at(f).properties != properties) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Returns true if this shape is equal to other (ignoring order of unichars
+// and fonts).
+bool Shape::operator==(const Shape &other) const {
+  return IsSubsetOf(other) && other.IsSubsetOf(*this);
+}
+
+// Returns true if this is a subset (including equal) of other.
+bool Shape::IsSubsetOf(const Shape &other) const {
+  for (const auto &unichar : unichars_) {
+    int unichar_id = unichar.unichar_id;
+    const std::vector<int> &font_list = unichar.font_ids;
+    for (int f : font_list) {
+      if (!other.ContainsUnicharAndFont(unichar_id, f)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+// Returns true if the lists of unichar ids are the same in this and other,
+// ignoring fonts.
+// NOT const, as it will sort the unichars on demand.
+bool Shape::IsEqualUnichars(Shape *other) {
+  if (unichars_.size() != other->unichars_.size()) {
+    return false;
+  }
+  if (!unichars_sorted_) {
+    SortUnichars();
+  }
+  if (!other->unichars_sorted_) {
+    other->SortUnichars();
+  }
+  for (unsigned c = 0; c < unichars_.size(); ++c) {
+    if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Sorts the unichars_ vector by unichar.
+void Shape::SortUnichars() {
+  std::sort(unichars_.begin(), unichars_.end(), UnicharAndFonts::StdSortByUnicharId);
+  unichars_sorted_ = true;
+}
+
+ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) {}
+ShapeTable::ShapeTable(const UNICHARSET &unicharset) : unicharset_(&unicharset), num_fonts_(0) {}
+
+// Writes to the given file. Returns false in case of error.
+bool ShapeTable::Serialize(FILE *fp) const {
+  return tesseract::Serialize(fp, shape_table_);
+}
+// Reads from the given file. Returns false in case of error.
+
+bool ShapeTable::DeSerialize(TFile *fp) {
+  if (!fp->DeSerialize(shape_table_)) {
+    return false;
+  }
+  num_fonts_ = 0;
+  return true;
+}
+
+// Returns the number of fonts used in this ShapeTable, computing it if
+// necessary.
+int ShapeTable::NumFonts() const {
+  if (num_fonts_ <= 0) {
+    for (auto shape_id : shape_table_) {
+      const Shape &shape = *shape_id;
+      for (int c = 0; c < shape.size(); ++c) {
+        for (int font_id : shape[c].font_ids) {
+          if (font_id >= num_fonts_) {
+            num_fonts_ = font_id + 1;
+          }
+        }
+      }
+    }
+  }
+  return num_fonts_;
+}
+
+// Re-indexes the class_ids in the shapetable according to the given map.
+// Useful in conjunction with set_unicharset.
+void ShapeTable::ReMapClassIds(const std::vector<int> &unicharset_map) {
+  for (auto shape : shape_table_) {
+    for (int c = 0; c < shape->size(); ++c) {
+      shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
+    }
+  }
+}
+
+// Returns a string listing the classes/fonts in a shape.
+std::string ShapeTable::DebugStr(unsigned shape_id) const {
+  if (shape_id >= shape_table_.size()) {
+    return "INVALID_UNICHAR_ID";
+  }
+  const Shape &shape = GetShape(shape_id);
+  std::string result;
+  result += "Shape" + std::to_string(shape_id);
+  if (shape.size() > 100) {
+    result += " Num unichars=" + std::to_string(shape.size());
+    return result;
+  }
+  for (int c = 0; c < shape.size(); ++c) {
+    result += " c_id=" + std::to_string(shape[c].unichar_id);
+    result += "=";
+    result += unicharset_->id_to_unichar(shape[c].unichar_id);
+    if (shape.size() < 10) {
+      result += ", " + std::to_string(shape[c].font_ids.size());
+      result += " fonts =";
+      int num_fonts = shape[c].font_ids.size();
+      if (num_fonts > 10) {
+        result += " " + std::to_string(shape[c].font_ids[0]);
+        result += " ... " + std::to_string(shape[c].font_ids[num_fonts - 1]);
+      } else {
+        for (int f = 0; f < num_fonts; ++f) {
+          result += " " + std::to_string(shape[c].font_ids[f]);
+        }
+      }
+    }
+  }
+  return result;
+}
+
+// Returns a debug string summarizing the table.
+std::string ShapeTable::SummaryStr() const {
+  int max_unichars = 0;
+  int num_multi_shapes = 0;
+  int num_master_shapes = 0;
+  for (unsigned s = 0; s < shape_table_.size(); ++s) {
+    if (MasterDestinationIndex(s) != s) {
+      continue;
+    }
+    ++num_master_shapes;
+    int shape_size = GetShape(s).size();
+    if (shape_size > 1) {
+      ++num_multi_shapes;
+    }
+    if (shape_size > max_unichars) {
+      max_unichars = shape_size;
+    }
+  }
+  std::string result;
+  result += "Number of shapes = " + std::to_string(num_master_shapes);
+  result += " max unichars = " + std::to_string(max_unichars);
+  result += " number with multiple unichars = " + std::to_string(num_multi_shapes);
+  return result;
+}
+
+// Adds a new shape starting with the given unichar_id and font_id.
+// Returns the assigned index.
+unsigned ShapeTable::AddShape(int unichar_id, int font_id) {
+  auto index = shape_table_.size();
+  auto *shape = new Shape;
+  shape->AddToShape(unichar_id, font_id);
+  shape_table_.push_back(shape);
+  num_fonts_ = std::max(num_fonts_, font_id + 1);
+  return index;
+}
+
+// Adds a copy of the given shape unless it is already present.
+// Returns the assigned index or index of existing shape if already present.
+unsigned ShapeTable::AddShape(const Shape &other) {
+  unsigned index;
+  for (index = 0; index < shape_table_.size() && !(other == *shape_table_[index]); ++index) {
+    continue;
+  }
+  if (index == shape_table_.size()) {
+    auto *shape = new Shape(other);
+    shape_table_.push_back(shape);
+  }
+  num_fonts_ = 0;
+  return index;
+}
+
+// Removes the shape given by the shape index.
+void ShapeTable::DeleteShape(unsigned shape_id) {
+  delete shape_table_[shape_id];
+  shape_table_.erase(shape_table_.begin() + shape_id);
+}
+
+// Adds a font_id to the given existing shape index for the given
+// unichar_id. If the unichar_id is not in the shape, it is added.
+void ShapeTable::AddToShape(unsigned shape_id, int unichar_id, int font_id) {
+  Shape &shape = *shape_table_[shape_id];
+  shape.AddToShape(unichar_id, font_id);
+  num_fonts_ = std::max(num_fonts_, font_id + 1);
+}
+
+// Adds the given shape to the existing shape with the given index.
+void ShapeTable::AddShapeToShape(unsigned shape_id, const Shape &other) {
+  Shape &shape = *shape_table_[shape_id];
+  shape.AddShape(other);
+  num_fonts_ = 0;
+}
+
+// Returns the id of the shape that contains the given unichar and font.
+// If not found, returns -1.
+// If font_id < 0, the font_id is ignored and the first shape that matches
+// the unichar_id is returned.
+int ShapeTable::FindShape(int unichar_id, int font_id) const {
+  for (unsigned s = 0; s < shape_table_.size(); ++s) {
+    const Shape &shape = GetShape(s);
+    for (int c = 0; c < shape.size(); ++c) {
+      if (shape[c].unichar_id == unichar_id) {
+        if (font_id < 0) {
+          return s; // We don't care about the font.
+        }
+        for (int f : shape[c].font_ids) {
+          if (f == font_id) {
+            return s;
+          }
+        }
+      }
+    }
+  }
+  return -1;
+}
+
+// Returns the first unichar_id and font_id in the given shape.
+void ShapeTable::GetFirstUnicharAndFont(unsigned shape_id, int *unichar_id, int *font_id) const {
+  const UnicharAndFonts &unichar_and_fonts = (*shape_table_[shape_id])[0];
+  *unichar_id = unichar_and_fonts.unichar_id;
+  *font_id = unichar_and_fonts.font_ids[0];
+}
+
+// Expands all the classes/fonts in the shape individually to build
+// a ShapeTable.
+int ShapeTable::BuildFromShape(const Shape &shape, const ShapeTable &master_shapes) {
+  BitVector shape_map(master_shapes.NumShapes());
+  for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
+    for (unsigned f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
+      int c = shape[u_ind].unichar_id;
+      int f = shape[u_ind].font_ids[f_ind];
+      int master_id = master_shapes.FindShape(c, f);
+      if (master_id >= 0) {
+        shape_map.SetBit(master_id);
+      } else if (FindShape(c, f) < 0) {
+        AddShape(c, f);
+      }
+    }
+  }
+  int num_masters = 0;
+  for (unsigned s = 0; s < master_shapes.NumShapes(); ++s) {
+    if (shape_map[s]) {
+      AddShape(master_shapes.GetShape(s));
+      ++num_masters;
+    }
+  }
+  return num_masters;
+}
+
+// Returns true if the shapes are already merged.
+bool ShapeTable::AlreadyMerged(unsigned shape_id1, unsigned shape_id2) const {
+  return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
+}
+
+// Returns true if any shape contains multiple unichars.
+bool ShapeTable::AnyMultipleUnichars() const {
+  auto num_shapes = NumShapes();
+  for (unsigned s1 = 0; s1 < num_shapes; ++s1) {
+    if (MasterDestinationIndex(s1) != s1) {
+      continue;
+    }
+    if (GetShape(s1).size() > 1) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns the maximum number of unichars over all shapes.
+int ShapeTable::MaxNumUnichars() const {
+  int max_num_unichars = 0;
+  int num_shapes = NumShapes();
+  for (int s = 0; s < num_shapes; ++s) {
+    if (GetShape(s).size() > max_num_unichars) {
+      max_num_unichars = GetShape(s).size();
+    }
+  }
+  return max_num_unichars;
+}
+
+// Merges shapes with a common unichar over the [start, end) interval.
+// Assumes single unichar per shape.
+void ShapeTable::ForceFontMerges(unsigned start, unsigned end) {
+  for (unsigned s1 = start; s1 < end; ++s1) {
+    if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
+      int unichar_id = GetShape(s1)[0].unichar_id;
+      for (auto s2 = s1 + 1; s2 < end; ++s2) {
+        if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
+            unichar_id == GetShape(s2)[0].unichar_id) {
+          MergeShapes(s1, s2);
+        }
+      }
+    }
+  }
+  ShapeTable compacted(*unicharset_);
+  compacted.AppendMasterShapes(*this, nullptr);
+  *this = compacted;
+}
+
+// Returns the number of unichars in the master shape.
+unsigned ShapeTable::MasterUnicharCount(unsigned shape_id) const {
+  int master_id = MasterDestinationIndex(shape_id);
+  return GetShape(master_id).size();
+}
+
+// Returns the sum of the font counts in the master shape.
+int ShapeTable::MasterFontCount(unsigned shape_id) const {
+  int master_id = MasterDestinationIndex(shape_id);
+  const Shape &shape = GetShape(master_id);
+  int font_count = 0;
+  for (int c = 0; c < shape.size(); ++c) {
+    font_count += shape[c].font_ids.size();
+  }
+  return font_count;
+}
+
+// Returns the number of unichars that would result from merging the shapes.
+int ShapeTable::MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const {
+  // Do it the easy way for now.
+  int master_id1 = MasterDestinationIndex(shape_id1);
+  int master_id2 = MasterDestinationIndex(shape_id2);
+  Shape combined_shape(*shape_table_[master_id1]);
+  combined_shape.AddShape(*shape_table_[master_id2]);
+  return combined_shape.size();
+}
+
+// Merges two shape_ids, leaving shape_id2 marked as merged.
+void ShapeTable::MergeShapes(unsigned shape_id1, unsigned shape_id2) {
+  auto master_id1 = MasterDestinationIndex(shape_id1);
+  auto master_id2 = MasterDestinationIndex(shape_id2);
+  // Point master_id2 (and all merged shapes) to master_id1.
+  shape_table_[master_id2]->set_destination_index(master_id1);
+  // Add all the shapes of master_id2 to master_id1.
+  shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
+}
+
+// Swaps two shape_ids.
+void ShapeTable::SwapShapes(unsigned shape_id1, unsigned shape_id2) {
+  Shape *tmp = shape_table_[shape_id1];
+  shape_table_[shape_id1] = shape_table_[shape_id2];
+  shape_table_[shape_id2] = tmp;
+}
+
+// Returns the destination of this shape, (if merged), taking into account
+// the fact that the destination may itself have been merged.
+unsigned ShapeTable::MasterDestinationIndex(unsigned shape_id) const {
+  auto dest_id = shape_table_[shape_id]->destination_index();
+  if (static_cast<unsigned>(dest_id) == shape_id || dest_id < 0) {
+    return shape_id; // Is master already.
+  }
+  auto master_id = shape_table_[dest_id]->destination_index();
+  if (master_id == dest_id || master_id < 0) {
+    return dest_id; // Dest is the master and shape_id points to it.
+  }
+  master_id = MasterDestinationIndex(master_id);
+  return master_id;
+}
+
+// Returns false if the unichars in neither shape is a subset of the other.
+bool ShapeTable::SubsetUnichar(unsigned shape_id1, unsigned shape_id2) const {
+  const Shape &shape1 = GetShape(shape_id1);
+  const Shape &shape2 = GetShape(shape_id2);
+  int c1, c2;
+  for (c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (!shape2.ContainsUnichar(unichar_id1)) {
+      break;
+    }
+  }
+  for (c2 = 0; c2 < shape2.size(); ++c2) {
+    int unichar_id2 = shape2[c2].unichar_id;
+    if (!shape1.ContainsUnichar(unichar_id2)) {
+      break;
+    }
+  }
+  return c1 == shape1.size() || c2 == shape2.size();
+}
+
+// Returns false if the unichars in neither shape is a subset of the other.
+bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, unsigned shape_id) const {
+  const Shape &merge1 = GetShape(merge_id1);
+  const Shape &merge2 = GetShape(merge_id2);
+  const Shape &shape = GetShape(shape_id);
+  int cm1, cm2, cs;
+  for (cs = 0; cs < shape.size(); ++cs) {
+    int unichar_id = shape[cs].unichar_id;
+    if (!merge1.ContainsUnichar(unichar_id) && !merge2.ContainsUnichar(unichar_id)) {
+      break; // Shape is not a subset of the merge.
+    }
+  }
+  for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
+    int unichar_id1 = merge1[cm1].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id1)) {
+      break; // Merge is not a subset of shape
+    }
+  }
+  for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
+    int unichar_id2 = merge2[cm2].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id2)) {
+      break; // Merge is not a subset of shape
+    }
+  }
+  return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
+}
+
+// Returns true if the unichar sets are equal between the shapes.
+bool ShapeTable::EqualUnichars(unsigned shape_id1, unsigned shape_id2) const {
+  const Shape &shape1 = GetShape(shape_id1);
+  const Shape &shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (!shape2.ContainsUnichar(unichar_id1)) {
+      return false;
+    }
+  }
+  for (int c2 = 0; c2 < shape2.size(); ++c2) {
+    int unichar_id2 = shape2[c2].unichar_id;
+    if (!shape1.ContainsUnichar(unichar_id2)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Returns true if the unichar sets are equal between the shapes.
+bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, unsigned shape_id) const {
+  const Shape &merge1 = GetShape(merge_id1);
+  const Shape &merge2 = GetShape(merge_id2);
+  const Shape &shape = GetShape(shape_id);
+  for (int cs = 0; cs < shape.size(); ++cs) {
+    int unichar_id = shape[cs].unichar_id;
+    if (!merge1.ContainsUnichar(unichar_id) && !merge2.ContainsUnichar(unichar_id)) {
+      return false; // Shape has a unichar that appears in neither merge.
+    }
+  }
+  for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
+    int unichar_id1 = merge1[cm1].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id1)) {
+      return false; // Merge has a unichar that is not in shape.
+    }
+  }
+  for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
+    int unichar_id2 = merge2[cm2].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id2)) {
+      return false; // Merge has a unichar that is not in shape.
+    }
+  }
+  return true;
+}
+
+// Returns true if there is a common unichar between the shapes.
+bool ShapeTable::CommonUnichars(unsigned shape_id1, unsigned shape_id2) const {
+  const Shape &shape1 = GetShape(shape_id1);
+  const Shape &shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (shape2.ContainsUnichar(unichar_id1)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if there is a common font id between the shapes.
+bool ShapeTable::CommonFont(unsigned shape_id1, unsigned shape_id2) const {
+  const Shape &shape1 = GetShape(shape_id1);
+  const Shape &shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    const std::vector<int> &font_list1 = shape1[c1].font_ids;
+    for (int f : font_list1) {
+      if (shape2.ContainsFont(f)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Appends the master shapes from other to this.
+// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
+void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
+  if (shape_map != nullptr) {
+    shape_map->clear();
+    shape_map->resize(other.NumShapes(), -1);
+  }
+  for (unsigned s = 0; s < other.shape_table_.size(); ++s) {
+    if (other.shape_table_[s]->destination_index() < 0) {
+      int index = AddShape(*other.shape_table_[s]);
+      if (shape_map != nullptr) {
+        (*shape_map)[s] = index;
+      }
+    }
+  }
+}
+
+// Returns the number of master shapes remaining after merging.
+int ShapeTable::NumMasterShapes() const {
+  int num_shapes = 0;
+  for (auto s : shape_table_) {
+    if (s->destination_index() < 0) {
+      ++num_shapes;
+    }
+  }
+  return num_shapes;
+}
+
+// Adds the unichars of the given shape_id to the vector of results. Any
+// unichar_id that is already present just has the fonts added to the
+// font set for that result without adding a new entry in the vector.
+// NOTE: it is assumed that the results are given to this function in order
+// of decreasing rating.
+// The unichar_map vector indicates the index of the results entry containing
+// each unichar, or -1 if the unichar is not yet included in results.
+void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
+                                   std::vector<UnicharRating> *results) const {
+  if (shape_rating.joined) {
+    AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, results);
+  }
+  if (shape_rating.broken) {
+    AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map, results);
+  }
+  const Shape &shape = GetShape(shape_rating.shape_id);
+  for (int u = 0; u < shape.size(); ++u) {
+    int result_index =
+        AddUnicharToResults(shape[u].unichar_id, shape_rating.rating, unichar_map, results);
+    for (int font_id : shape[u].font_ids) {
+      (*results)[result_index].fonts.emplace_back(font_id,
+                                                  IntCastRounded(shape_rating.rating * INT16_MAX));
+    }
+  }
+}
+
+// Adds the given unichar_id to the results if needed, updating unichar_map
+// and returning the index of unichar in results.
+int ShapeTable::AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
+                                    std::vector<UnicharRating> *results) const {
+  int result_index = unichar_map->at(unichar_id);
+  if (result_index < 0) {
+    UnicharRating result(unichar_id, rating);
+    result_index = results->size();
+    results->push_back(result);
+    (*unichar_map)[unichar_id] = result_index;
+  }
+  return result_index;
+}
+
+} // namespace tesseract