diff mupdf-source/thirdparty/tesseract/src/textord/workingpartset.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/textord/workingpartset.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////
+// File:        workingpartset.h
+// Description: Class to hold a working set of partitions of the page
+//              during construction of text/image regions.
+// Author:      Ray Smith
+// Created:     Tue Ocr 28 17:21:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H_
+#define TESSERACT_TEXTORD_WORKINGPARSET_H_
+
+#include "blobbox.h"      // For TO_BLOCK_LIST and BLOCK_LIST.
+#include "colpartition.h" // For ColPartition_LIST.
+
+namespace tesseract {
+
+// WorkingPartSet holds a working set of ColPartitions during transformation
+// from the grid-based storage to regions in logical reading order, and is
+// therefore only used during construction of the regions.
+class WorkingPartSet : public ELIST_LINK {
+public:
+  explicit WorkingPartSet(ColPartition *column)
+      : column_(column), latest_part_(nullptr), part_it_(&part_set_) {}
+
+  // Simple accessors.
+  ColPartition *column() const {
+    return column_;
+  }
+  void set_column(ColPartition *col) {
+    column_ = col;
+  }
+
+  // Add the partition to this WorkingPartSet. Partitions are generally
+  // stored in the order in which they are received, but if the partition
+  // has a SingletonPartner, make sure that it stays with its partner.
+  void AddPartition(ColPartition *part);
+
+  // Make blocks out of any partitions in this WorkingPartSet, and append
+  // them to the end of the blocks list. bleft, tright and resolution give
+  // the bounds and resolution of the source image, so that blocks can be
+  // made to fit in the bounds.
+  // All ColPartitions go in the used_parts list, as they need to be kept
+  // around, but are no longer needed.
+  void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
+                              ColPartition_LIST *used_parts, BLOCK_LIST *blocks,
+                              TO_BLOCK_LIST *to_blocks);
+
+  // Insert the given blocks at the front of the completed_blocks_ list so
+  // they can be kept in the correct reading order.
+  void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
+
+private:
+  // Convert the part_set_ into blocks, starting a new block at a break
+  // in partnerships, or a change in linespacing (for text).
+  void MakeBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
+                  ColPartition_LIST *used_parts);
+
+  // The column that this working set applies to. Used by the caller.
+  ColPartition *column_;
+  // The most recently added partition.
+  ColPartition *latest_part_;
+  // All the partitions in the block that is currently under construction.
+  ColPartition_LIST part_set_;
+  // Iteratorn on part_set_ pointing to the most recent addition.
+  ColPartition_IT part_it_;
+  // The blocks that have been made so far and belong before the current block.
+  BLOCK_LIST completed_blocks_;
+  TO_BLOCK_LIST to_blocks_;
+};
+
+ELISTIZEH(WorkingPartSet)
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_WORKINGPARSET_H_