comparison mupdf-source/thirdparty/tesseract/src/classify/tessclassifier.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: tessclassifier.cpp
3 // Description: Tesseract implementation of a ShapeClassifier.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2011, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #include "tessclassifier.h"
20
21 #include "classify.h"
22 #include "trainingsample.h"
23
24 namespace tesseract {
25
26 // Classifies the given [training] sample, writing to results.
27 // See ShapeClassifier for a full description.
28 int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
29 UNICHAR_ID keep_this,
30 std::vector<UnicharRating> *results) {
31 const int old_matcher_level = classify_->matcher_debug_level;
32 const int old_matcher_flags = classify_->matcher_debug_flags;
33 const int old_classify_level = classify_->classify_debug_level;
34 if (debug) {
35 // Explicitly set values of various control parameters to generate debug
36 // output if required, restoring the old values after classifying.
37 classify_->matcher_debug_level.set_value(2);
38 classify_->matcher_debug_flags.set_value(25);
39 classify_->classify_debug_level.set_value(3);
40 }
41 classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results);
42 if (debug) {
43 classify_->matcher_debug_level.set_value(old_matcher_level);
44 classify_->matcher_debug_flags.set_value(old_matcher_flags);
45 classify_->classify_debug_level.set_value(old_classify_level);
46 }
47 return results->size();
48 }
49
50 // Provides access to the ShapeTable that this classifier works with.
51 const ShapeTable *TessClassifier::GetShapeTable() const {
52 return classify_->shape_table();
53 }
54 // Provides access to the UNICHARSET that this classifier works with.
55 // Only needs to be overridden if GetShapeTable() can return nullptr.
56 const UNICHARSET &TessClassifier::GetUnicharset() const {
57 return classify_->unicharset;
58 }
59
60 // Displays classification as the given shape_id. Creates as many windows
61 // as it feels fit, using index as a guide for placement. Adds any created
62 // windows to the windows output and returns a new index that may be used
63 // by any subsequent classifiers. Caller waits for the user to view and
64 // then destroys the windows by clearing the vector.
65 int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id,
66 int index, std::vector<ScrollView *> &windows) {
67 int shape_id = unichar_id;
68 // TODO(rays) Fix this so it works with both flat and real shapetables.
69 // if (GetShapeTable() != nullptr)
70 // shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, nullptr);
71 if (shape_id < 0) {
72 return index;
73 }
74 if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) {
75 tprintf("No built-in templates for class/shape %d\n", shape_id);
76 return index;
77 }
78 #ifndef GRAPHICS_DISABLED
79 classify_->ShowBestMatchFor(shape_id, sample.features(), sample.num_features());
80 #endif
81 return index;
82 }
83
84 } // namespace tesseract