annotate mupdf-source/thirdparty/tesseract/src/textord/tablerecog.h @ 17:dd9cdb856310

Remove PKG-INFO from the because it is regenerated automatically for the sdist
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 18 Sep 2025 17:40:40 +0200
parents b50eed0cc0ef
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
1 ///////////////////////////////////////////////////////////////////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
2 // File: tablerecog.h
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
3 // Description: Functions to detect structure of tables.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
4 // Author: Nicholas Beato
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
5 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
6 // (C) Copyright 2010, Google Inc.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
7 // Licensed under the Apache License, Version 2.0 (the "License");
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
8 // you may not use this file except in compliance with the License.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
9 // You may obtain a copy of the License at
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
10 // http://www.apache.org/licenses/LICENSE-2.0
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
11 // Unless required by applicable law or agreed to in writing, software
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
12 // distributed under the License is distributed on an "AS IS" BASIS,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
14 // See the License for the specific language governing permissions and
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
15 // limitations under the License.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
16 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
17 ///////////////////////////////////////////////////////////////////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
18
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
19 #ifndef TABLERECOG_H_
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
20 #define TABLERECOG_H_
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
21
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
22 #include "colpartitiongrid.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
23
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
24 namespace tesseract {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
25
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
26 // There are 2 classes in this file. They have 2 different purposes.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
27 // - StructuredTable contains the methods to find the structure given
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
28 // a specific bounding box and grow that structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
29 // - TableRecognizer contains the methods to adjust the possible positions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
30 // of a table without worrying about structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
31 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
32 // To use these classes, the assumption is that the TableFinder will
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
33 // have a guess of the location of a table (or possibly over/undersegmented
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
34 // tables). The TableRecognizer is responsible for finding the table boundaries
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
35 // at a high level. The StructuredTable class is responsible for determining
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
36 // the structure of the table and trying to maximize its bounds while retaining
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
37 // the structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
38 // (The latter part is not implemented yet, but that was the goal).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
39 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
40 // While on the boundary discussion, keep in mind that this is a first pass.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
41 // There should eventually be some things like internal structure checks,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
42 // and, more importantly, surrounding text flow checks.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
43 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
44
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
45 // Usage:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
46 // The StructuredTable class contains methods to query a potential table.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
47 // It has functions to find structure, count rows, find ColPartitions that
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
48 // intersect gridlines, etc. It is not meant to blindly find a table. It
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
49 // is meant to start with a known table location and enhance it.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
50 // Usage:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
51 // ColPartitionGrid text_grid, line_grid; // init
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
52 // TBOX table_box; // known location of table location
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
53 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
54 // StructuredTable table;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
55 // table.Init(); // construction code
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
56 // table.set_text_grid(/* text */); // These 2 grids can be the same!
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
57 // table.set_line_grid(/* lines */);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
58 // table.set_min_text_height(10); // Filter vertical and tall text.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
59 // // IMPORTANT! The table needs to be told where it is!
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
60 // table.set_bounding_box(table_box); // Set initial table location.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
61 // if (table.FindWhitespacedStructure()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
62 // // process table
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
63 // table.column_count(); // number of columns
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
64 // table.row_count(); // number of rows
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
65 // table.cells_count(); // number of cells
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
66 // table.bounding_box(); // updated bounding box
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
67 // // etc.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
68 // }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
69 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
70 class TESS_API StructuredTable {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
71 public:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
72 StructuredTable();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
73 ~StructuredTable() = default;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
74
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
75 // Initialization code. Must be called after the constructor.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
76 void Init();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
77
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
78 // Sets the grids used by the table. These can be changed between
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
79 // calls to Recognize. They are treated as read-only data.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
80 void set_text_grid(ColPartitionGrid *text);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
81 void set_line_grid(ColPartitionGrid *lines);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
82 // Filters text partitions that are ridiculously tall to prevent
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
83 // merging rows.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
84 void set_max_text_height(int height);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
85
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
86 // Basic accessors. Some are treated as attributes despite having indirect
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
87 // representation.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
88 bool is_lined() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
89 unsigned row_count() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
90 unsigned column_count() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
91 unsigned cell_count() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
92 void set_bounding_box(const TBOX &box);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
93 const TBOX &bounding_box() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
94 int median_cell_height();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
95 int median_cell_width();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
96 int row_height(unsigned row) const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
97 int column_width(unsigned column) const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
98 int space_above() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
99 int space_below() const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
100
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
101 // Given enough horizontal and vertical lines in a region, create this table
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
102 // based on the structure given by the lines. Return true if it worked out.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
103 // Code assumes the lines exist. It is the caller's responsibility to check
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
104 // for lines and find an appropriate bounding box.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
105 bool FindLinedStructure();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
106
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
107 // The main subroutine for finding generic table structure. The function
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
108 // finds the grid structure in the given box. Returns true if a good grid
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
109 // exists, implying that "this" table is valid.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
110 bool FindWhitespacedStructure();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
111
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
112 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
113 //////// Functions to query table info.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
114 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
115
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
116 // Returns true if inserting part into the table does not cause any
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
117 // cell merges.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
118 bool DoesPartitionFit(const ColPartition &part) const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
119 // Checks if a sub-table has multiple data cells filled.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
120 int CountFilledCells();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
121 int CountFilledCellsInRow(int row);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
122 int CountFilledCellsInColumn(int column);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
123 int CountFilledCells(unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
124
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
125 // Makes sure that at least one cell in a row has substantial area filled.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
126 // This can filter out large whitespace caused by growing tables too far
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
127 // and page numbers.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
128 // (currently bugged for some reason).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
129 bool VerifyRowFilled(int row);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
130 // Finds the filled area in a cell.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
131 double CalculateCellFilledPercentage(unsigned row, unsigned column);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
132
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
133 // Debug display, draws the table in the given color. If the table is not
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
134 // valid, the table and "best" grid lines are still drawn in the given color.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
135 void Display(ScrollView *window, ScrollView::Color color);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
136
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
137 protected:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
138 // Clear the structure information.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
139 void ClearStructure();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
140
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
141 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
142 //////// Lined tables
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
143 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
144
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
145 // Verifies the lines do not intersect partitions. This happens when
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
146 // the lines are in column boundaries and extend the full page. As a result,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
147 // the grid lines go through column text. The condition is detectable.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
148 bool VerifyLinedTableCells();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
149
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
150 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
151 //////// Tables with whitespace
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
152 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
153
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
154 // This is the function to change if you want to filter resulting tables
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
155 // better. Right now it just checks for a minimum cell count and such.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
156 // You could add things like maximum number of ColPartitions per cell or
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
157 // similar.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
158 bool VerifyWhitespacedTable();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
159 // Find the columns of a table using whitespace.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
160 void FindWhitespacedColumns();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
161 // Find the rows of a table using whitespace.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
162 void FindWhitespacedRows();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
163
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
164 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
165 //////// Functions to provide information about the table.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
166 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
167
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
168 // Calculates the whitespace around the table using the table boundary and
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
169 // the supplied grids (set_text_grid and set_line_grid).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
170 void CalculateMargins();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
171 // Update the table margins with the supplied grid. This is
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
172 // only called by calculate margins to use multiple grid sources.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
173 void UpdateMargins(ColPartitionGrid *grid);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
174 int FindVerticalMargin(ColPartitionGrid *grid, int start_x, bool decrease) const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
175 int FindHorizontalMargin(ColPartitionGrid *grid, int start_y, bool decrease) const;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
176 // Calculates stats on the table, namely the median cell height and width.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
177 void CalculateStats();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
178
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
179 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
180 //////// Functions to try to "fix" some table errors.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
181 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
182
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
183 // Given a whitespaced table, this looks for bordering lines that might
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
184 // be page layout boxes around the table. It is necessary to get the margins
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
185 // correct on the table. If the lines are not joined, the margins will be
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
186 // the distance to the line, which is not right.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
187 void AbsorbNearbyLines();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
188
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
189 // Nice utility function for finding partition gaps. You feed it a sorted
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
190 // list of all of the mins/maxes of the partitions in the table, and it gives
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
191 // you the gaps (middle). This works for both vertical and horizontal
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
192 // gaps.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
193 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
194 // If you want to allow slight overlap in the division and the partitions,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
195 // just scale down the partitions before inserting them in the list.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
196 // Likewise, you can force at least some space between partitions.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
197 // This trick is how the horizontal partitions are done (since the page
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
198 // skew could make it hard to find splits in the text).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
199 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
200 // As a result, "0 distance" between closest partitions causes a gap.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
201 // This is not a programmatic assumption. It is intentional and simplifies
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
202 // things.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
203 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
204 // "max_merged" indicates both the minimum number of stacked partitions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
205 // to cause a cell (add 1 to it), and the maximum number of partitions that
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
206 // a grid line can intersect. For example, if max_merged is 0, then lines
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
207 // are inserted wherever space exists between partitions. If it is 2,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
208 // lines may intersect 2 partitions at most, but you also need at least
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
209 // 2 partitions to generate a line.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
210 static void FindCellSplitLocations(const std::vector<int> &min_list,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
211 const std::vector<int> &max_list, int max_merged,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
212 std::vector<int> *locations);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
213
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
214 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
215 //////// Utility function for table queries
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
216 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
217
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
218 // Counts the number of ColPartitions that intersect vertical cell
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
219 // division at this x value. Used by VerifyLinedTable.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
220 int CountVerticalIntersections(int x);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
221 int CountHorizontalIntersections(int y);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
222
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
223 // Counts how many text partitions are in this box.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
224 int CountPartitions(const TBOX &box);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
225
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
226 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
227 //////// Data members.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
228 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
229
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
230 // Input data, used as read only data to make decisions.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
231 ColPartitionGrid *text_grid_; // Text ColPartitions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
232 ColPartitionGrid *line_grid_; // Line ColPartitions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
233 // Table structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
234 // bounding box is a convenient external representation.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
235 // cell_x_ and cell_y_ indicate the grid lines.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
236 TBOX bounding_box_; // Bounding box
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
237 std::vector<int> cell_x_; // Locations of vertical divisions (sorted)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
238 std::vector<int> cell_y_; // Locations of horizontal divisions (sorted)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
239 bool is_lined_; // Is the table backed up by a line structure
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
240 // Table margins, set via CalculateMargins
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
241 int space_above_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
242 int space_below_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
243 int space_left_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
244 int space_right_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
245 int median_cell_height_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
246 int median_cell_width_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
247 // Filters, used to prevent awkward partitions from destroying structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
248 int max_text_height_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
249 };
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
250
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
251 class TESS_API TableRecognizer {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
252 public:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
253 TableRecognizer() = default;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
254 ~TableRecognizer() = default;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
255
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
256 // Initialization code. Must be called after the constructor.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
257 void Init();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
258
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
259 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
260 //////// Pre-recognize methods to initial table constraints.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
261 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
262
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
263 // Sets the grids used by the table. These can be changed between
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
264 // calls to Recognize. They are treated as read-only data.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
265 void set_text_grid(ColPartitionGrid *text);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
266 void set_line_grid(ColPartitionGrid *lines);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
267 // Sets some additional constraints on the table.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
268 void set_min_height(int height);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
269 void set_min_width(int width);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
270 // Filters text partitions that are ridiculously tall to prevent
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
271 // merging rows. Note that "filters" refers to allowing horizontal
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
272 // cells to slice through them on the premise that they were
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
273 // merged text rows during previous layout.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
274 void set_max_text_height(int height);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
275
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
276 // Given a guess location, the RecognizeTable function will try to find a
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
277 // structured grid in the area. On success, it will return a new
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
278 // StructuredTable (and assumes you will delete it). Otherwise,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
279 // nullptr is returned.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
280 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
281 // Keep in mind, this may "overgrow" or "undergrow" the size of guess.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
282 // Ideally, there is either a one-to-one correspondence between
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
283 // the guess and table or no table at all. This is not the best of
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
284 // assumptions right now, but was made to try to keep things simple in
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
285 // the first pass.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
286 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
287 // If a line structure is available on the page in the given region,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
288 // the table will use the linear structure as it is.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
289 // Otherwise, it will try to maximize the whitespace around it while keeping
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
290 // a grid structure. This is somewhat working.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
291 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
292 // Since the combination of adjustments can get high, effort was
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
293 // originally made to keep the number of adjustments linear in the number
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
294 // of partitions. The underlying structure finding code used to be
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
295 // much more complex. I don't know how necessary this constraint is anymore.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
296 // The evaluation of a possible table is kept within O(nlogn) in the size of
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
297 // the table (where size is the number of partitions in the table).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
298 // As a result, the algorithm is capable of O(n^2 log n). Depending
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
299 // on the grid search size, it may be higher.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
300 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
301 // Last note: it is possible to just try all partition boundaries at a high
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
302 // level O(n^4) and do a verification scheme (at least O(nlogn)). If there
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
303 // area 200 partitions on a page, this could be too costly. Effort could go
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
304 // into pruning the search, but I opted for something quicker. I'm confident
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
305 // that the independent adjustments can get similar results and keep the
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
306 // complextiy down. However, the other approach could work without using
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
307 // TableFinder at all if it is fast enough. It comes down to properly
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
308 // deciding what is a table. The code currently relies on TableFinder's
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
309 // guess to the location of a table for that.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
310 StructuredTable *RecognizeTable(const TBOX &guess_box);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
311
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
312 protected:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
313 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
314 //////// Lined tables
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
315 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
316
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
317 // Returns true if the given box has a lined table within it. The
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
318 // table argument will be updated with the table if the table exists.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
319 bool RecognizeLinedTable(const TBOX &guess_box, StructuredTable *table);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
320 // Returns true if the given box has a large number of horizontal and
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
321 // vertical lines present. If so, we assume the extent of these lines
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
322 // uniquely defines a table and find that table via SolveLinedTable.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
323 bool HasSignificantLines(const TBOX &guess);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
324
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
325 // Given enough horizontal and vertical lines in a region, find a bounding
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
326 // box that encloses all of them (as well as newly introduced lines).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
327 // The bounding box is the smallest box that encloses the lines in guess
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
328 // without having any lines sticking out of it.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
329 // bounding_box is an in/out parameter.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
330 // On input, it in the extents of the box to search.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
331 // On output, it is the resulting bounding box.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
332 bool FindLinesBoundingBox(TBOX *bounding_box);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
333 // Iteration in above search.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
334 // bounding_box is an in/out parameter.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
335 // On input, it in the extents of the box to search.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
336 // On output, it is the resulting bounding box.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
337 bool FindLinesBoundingBoxIteration(TBOX *bounding_box);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
338
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
339 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
340 //////// Generic "whitespaced" tables
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
341 ////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
342
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
343 // Returns true if the given box has a whitespaced table within it. The
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
344 // table argument will be updated if the table exists. Also note
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
345 // that this method will fail if the guess_box center is not
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
346 // mostly within the table.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
347 bool RecognizeWhitespacedTable(const TBOX &guess_box, StructuredTable *table);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
348
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
349 // Finds the location of a horizontal split relative to y.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
350 // This function is mostly unused now. If the SolveWhitespacedTable
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
351 // changes much, it can be removed. Note, it isn't really as reliable
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
352 // as I thought. I went with alternatives for most of the other uses.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
353 int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
354
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
355 // Input data, used as read only data to make decisions.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
356 ColPartitionGrid *text_grid_ = nullptr; // Text ColPartitions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
357 ColPartitionGrid *line_grid_ = nullptr; // Line ColPartitions
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
358 // Table constraints, a "good" table must satisfy these.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
359 int min_height_ = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
360 int min_width_ = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
361 // Filters, used to prevent awkward partitions from destroying structure.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
362 int max_text_height_ = INT32_MAX; // Horizontal lines may intersect taller text.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
363 };
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
364
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
365 } // namespace tesseract
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
366
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
367 #endif /* TABLERECOG_H_ */