comparison mupdf-source/thirdparty/tesseract/src/ccstruct/boxword.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: boxword.h
3 // Description: Class to represent the bounding boxes of the output.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #ifndef TESSERACT_CSTRUCT_BOXWORD_H_
20 #define TESSERACT_CSTRUCT_BOXWORD_H_
21
22 #include "rect.h" // for TBOX
23
24 #include <functional> // for std::function
25
26 namespace tesseract {
27
28 class BLOCK;
29 class WERD;
30 struct TWERD;
31
32 // Class to hold an array of bounding boxes for an output word and
33 // the bounding box of the whole word.
34 class BoxWord {
35 public:
36 BoxWord();
37 explicit BoxWord(const BoxWord &src);
38 ~BoxWord() = default;
39
40 BoxWord &operator=(const BoxWord &src);
41
42 void CopyFrom(const BoxWord &src);
43
44 // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
45 // switch back to original image coordinates.
46 static BoxWord *CopyFromNormalized(TWERD *tessword);
47
48 // Clean up the bounding boxes from the polygonal approximation by
49 // expanding slightly, then clipping to the blobs from the original_word
50 // that overlap. If not null, the block provides the inverse rotation.
51 void ClipToOriginalWord(const BLOCK *block, WERD *original_word);
52
53 // Merges the boxes from start to end, not including end, and deletes
54 // the boxes between start and end.
55 void MergeBoxes(unsigned start, unsigned end);
56
57 // Inserts a new box before the given index.
58 // Recomputes the bounding box.
59 void InsertBox(unsigned index, const TBOX &box);
60
61 // Changes the box at the given index to the new box.
62 // Recomputes the bounding box.
63 void ChangeBox(unsigned index, const TBOX &box);
64
65 // Deletes the box with the given index, and shuffles up the rest.
66 // Recomputes the bounding box.
67 void DeleteBox(unsigned index);
68
69 // Deletes all the boxes stored in BoxWord.
70 void DeleteAllBoxes();
71
72 // This and other putatively are the same, so call the (permanent) callback
73 // for each blob index where the bounding boxes match.
74 // The callback is deleted on completion.
75 void ProcessMatchedBlobs(const TWERD &other,
76 const std::function<void(int)> &cb) const;
77
78 const TBOX &bounding_box() const {
79 return bbox_;
80 }
81 unsigned length() const {
82 return length_;
83 }
84 const TBOX &BlobBox(unsigned index) const {
85 return boxes_[index];
86 }
87
88 private:
89 void ComputeBoundingBox();
90
91 TBOX bbox_;
92 unsigned length_;
93 std::vector<TBOX> boxes_;
94 };
95
96 } // namespace tesseract.
97
98 #endif // TESSERACT_CSTRUCT_BOXWORD_H_