diff mupdf-source/thirdparty/tesseract/src/ccstruct/boxword.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/boxword.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,98 @@
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.h
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
+#define TESSERACT_CSTRUCT_BOXWORD_H_
+
+#include "rect.h" // for TBOX
+
+#include <functional> // for std::function
+
+namespace tesseract {
+
+class BLOCK;
+class WERD;
+struct TWERD;
+
+// Class to hold an array of bounding boxes for an output word and
+// the bounding box of the whole word.
+class BoxWord {
+public:
+  BoxWord();
+  explicit BoxWord(const BoxWord &src);
+  ~BoxWord() = default;
+
+  BoxWord &operator=(const BoxWord &src);
+
+  void CopyFrom(const BoxWord &src);
+
+  // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+  // switch back to original image coordinates.
+  static BoxWord *CopyFromNormalized(TWERD *tessword);
+
+  // Clean up the bounding boxes from the polygonal approximation by
+  // expanding slightly, then clipping to the blobs from the original_word
+  // that overlap. If not null, the block provides the inverse rotation.
+  void ClipToOriginalWord(const BLOCK *block, WERD *original_word);
+
+  // Merges the boxes from start to end, not including end, and deletes
+  // the boxes between start and end.
+  void MergeBoxes(unsigned start, unsigned end);
+
+  // Inserts a new box before the given index.
+  // Recomputes the bounding box.
+  void InsertBox(unsigned index, const TBOX &box);
+
+  // Changes the box at the given index to the new box.
+  // Recomputes the bounding box.
+  void ChangeBox(unsigned index, const TBOX &box);
+
+  // Deletes the box with the given index, and shuffles up the rest.
+  // Recomputes the bounding box.
+  void DeleteBox(unsigned index);
+
+  // Deletes all the boxes stored in BoxWord.
+  void DeleteAllBoxes();
+
+  // This and other putatively are the same, so call the (permanent) callback
+  // for each blob index where the bounding boxes match.
+  // The callback is deleted on completion.
+  void ProcessMatchedBlobs(const TWERD &other,
+                           const std::function<void(int)> &cb) const;
+
+  const TBOX &bounding_box() const {
+    return bbox_;
+  }
+  unsigned length() const {
+    return length_;
+  }
+  const TBOX &BlobBox(unsigned index) const {
+    return boxes_[index];
+  }
+
+private:
+  void ComputeBoundingBox();
+
+  TBOX bbox_;
+  unsigned length_;
+  std::vector<TBOX> boxes_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CSTRUCT_BOXWORD_H_