annotate mupdf-source/thirdparty/tesseract/src/training/common/errorcounter.cpp @ 40:aa33339d6b8a upstream

ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:31:38 +0200
parents b50eed0cc0ef
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
1 // Copyright 2011 Google Inc. All Rights Reserved.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
2 // Author: rays@google.com (Ray Smith)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
3 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
4 // Licensed under the Apache License, Version 2.0 (the "License");
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
5 // you may not use this file except in compliance with the License.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
6 // You may obtain a copy of the License at
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
7 // http://www.apache.org/licenses/LICENSE-2.0
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
8 // Unless required by applicable law or agreed to in writing, software
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
9 // distributed under the License is distributed on an "AS IS" BASIS,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
11 // See the License for the specific language governing permissions and
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
12 // limitations under the License.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
13 //
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
14 ///////////////////////////////////////////////////////////////////////
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
15
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
16 #ifdef HAVE_CONFIG_H
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
17 # include "config_auto.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
18 #endif
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
19
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
20 #include "errorcounter.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
21
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
22 #include "fontinfo.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
23 #include "sampleiterator.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
24 #include "shapeclassifier.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
25 #include "shapetable.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
26 #include "tesserrstream.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
27 #include "trainingsample.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
28 #include "trainingsampleset.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
29 #include "unicity_table.h"
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
30
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
31 #include <algorithm>
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
32 #include <ctime>
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
33
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
34 namespace tesseract {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
35
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
36 // Difference in result rating to be thought of as an "equal" choice.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
37 const double kRatingEpsilon = 1.0 / 32;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
38
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
39 // Tests a classifier, computing its error rate.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
40 // See errorcounter.h for description of arguments.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
41 // Iterates over the samples, calling the classifier in normal/silent mode.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
42 // If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
43 // report_level is set (4 or greater), it will then call the classifier again
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
44 // with a debug flag and a keep_this argument to find out what is going on.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
45 double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
46 CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
47 const std::vector<Image > &page_images, SampleIterator *it,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
48 double *unichar_error, double *scaled_error,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
49 std::string *fonts_report) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
50 const int fontsize = it->sample_set()->NumFonts();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
51 ErrorCounter counter(classifier->GetUnicharset(), fontsize);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
52 std::vector<UnicharRating> results;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
53
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
54 clock_t total_time = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
55 if (report_level > 1) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
56 total_time = clock();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
57 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
58 unsigned total_samples = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
59 double unscaled_error = 0.0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
60 // Set a number of samples on which to run the classify debug mode.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
61 int error_samples = report_level > 3 ? report_level * report_level : 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
62 // Iterate over all the samples, accumulating errors.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
63 for (it->Begin(); !it->AtEnd(); it->Next()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
64 TrainingSample *mutable_sample = it->MutableSample();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
65 int page_index = mutable_sample->page_num();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
66 Image page_pix =
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
67 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
68 // No debug, no keep this.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
69 classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
70 bool debug_it = false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
71 int correct_id = mutable_sample->class_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
72 if (counter.unicharset_.has_special_codes() &&
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
73 (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
74 correct_id == UNICHAR_BROKEN)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
75 // This is junk so use the special counter.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
76 debug_it = counter.AccumulateJunk(report_level > 3, results, mutable_sample);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
77 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
78 debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, results,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
79 mutable_sample);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
80 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
81 if (debug_it && error_samples > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
82 // Running debug, keep the correct answer, and debug the classifier.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
83 tprintf("Error on sample %d: %s Classifier debug output:\n", it->GlobalSampleIndex(),
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
84 it->sample_set()->SampleToString(*mutable_sample).c_str());
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
85 #ifndef GRAPHICS_DISABLED
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
86 classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
87 #endif
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
88 --error_samples;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
89 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
90 ++total_samples;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
91 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
92 // Create the appropriate error report.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
93 unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
94 unichar_error, fonts_report);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
95 if (scaled_error != nullptr) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
96 *scaled_error = counter.scaled_error_;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
97 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
98 if (report_level > 1 && total_samples > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
99 // It is useful to know the time in microseconds/char.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
100 total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
101 tesserr << "Errors computed in " << total_time << " ms at "
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
102 << 1000 * total_time / total_samples << " μs/char\n";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
103 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
104 return unscaled_error;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
105 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
106
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
107 // Tests a pair of classifiers, debugging errors of the new against the old.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
108 // See errorcounter.h for description of arguments.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
109 // Iterates over the samples, calling the classifiers in normal/silent mode.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
110 // If the new_classifier makes a boosting_mode error that the old_classifier
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
111 // does not, it will then call the new_classifier again with a debug flag
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
112 // and a keep_this argument to find out what is going on.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
113 void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
114 CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
115 const std::vector<Image > &page_images, SampleIterator *it) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
116 int fontsize = it->sample_set()->NumFonts();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
117 ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
118 ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
119 std::vector<UnicharRating> results;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
120
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
121 #if !defined(NDEBUG)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
122 int total_samples = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
123 #endif
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
124 int error_samples = 25;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
125 int total_new_errors = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
126 // Iterate over all the samples, accumulating errors.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
127 for (it->Begin(); !it->AtEnd(); it->Next()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
128 TrainingSample *mutable_sample = it->MutableSample();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
129 int page_index = mutable_sample->page_num();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
130 Image page_pix =
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
131 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
132 // No debug, no keep this.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
133 old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
134 &results);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
135 int correct_id = mutable_sample->class_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
136 if (correct_id != 0 && !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
137 results, mutable_sample)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
138 // old classifier was correct, check the new one.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
139 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
140 &results);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
141 if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
142 results, mutable_sample)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
143 tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex());
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
144 ++total_new_errors;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
145 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, correct_id, &results);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
146 if (results.size() > 0 && error_samples > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
147 #ifndef GRAPHICS_DISABLED
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
148 new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
149 #endif
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
150 --error_samples;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
151 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
152 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
153 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
154 #if !defined(NDEBUG)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
155 ++total_samples;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
156 #endif
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
157 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
158 tprintf("Total new errors = %d\n", total_new_errors);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
159 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
160
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
161 // Constructor is private. Only anticipated use of ErrorCounter is via
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
162 // the static ComputeErrorRate.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
163 ErrorCounter::ErrorCounter(const UNICHARSET &unicharset, int fontsize)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
164 : scaled_error_(0.0)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
165 , rating_epsilon_(kRatingEpsilon)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
166 , unichar_counts_(unicharset.size(), unicharset.size(), 0)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
167 , ok_score_hist_(0, 101)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
168 , bad_score_hist_(0, 101)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
169 , unicharset_(unicharset) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
170 Counts empty_counts;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
171 font_counts_.clear();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
172 font_counts_.resize(fontsize, empty_counts);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
173 multi_unichar_counts_.clear();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
174 multi_unichar_counts_.resize(unicharset.size(), 0);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
175 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
176
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
177 // Accumulates the errors from the classifier results on a single sample.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
178 // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
179 // boosting_mode selects the type of error to be used for boosting and the
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
180 // is_error_ member of sample is set according to whether the required type
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
181 // of error occurred. The font_table provides access to font properties
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
182 // for error counting and shape_table is used to understand the relationship
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
183 // between unichar_ids and shape_ids in the results
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
184 bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
185 const FontInfoTable &font_table,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
186 const std::vector<UnicharRating> &results,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
187 TrainingSample *sample) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
188 int num_results = results.size();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
189 int answer_actual_rank = -1;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
190 int font_id = sample->font_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
191 int unichar_id = sample->class_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
192 sample->set_is_error(false);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
193 if (num_results == 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
194 // Reject. We count rejects as a separate category, but still mark the
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
195 // sample as an error in case any training module wants to use that to
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
196 // improve the classifier.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
197 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
198 ++font_counts_[font_id].n[CT_REJECT];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
199 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
200 // Find rank of correct unichar answer, using rating_epsilon_ to allow
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
201 // different answers to score as equal. (Ignoring the font.)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
202 int epsilon_rank = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
203 int answer_epsilon_rank = -1;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
204 int num_top_answers = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
205 double prev_rating = results[0].rating;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
206 bool joined = false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
207 bool broken = false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
208 int res_index = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
209 while (res_index < num_results) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
210 if (results[res_index].rating < prev_rating - rating_epsilon_) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
211 ++epsilon_rank;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
212 prev_rating = results[res_index].rating;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
213 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
214 if (results[res_index].unichar_id == unichar_id && answer_epsilon_rank < 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
215 answer_epsilon_rank = epsilon_rank;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
216 answer_actual_rank = res_index;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
217 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
218 if (results[res_index].unichar_id == UNICHAR_JOINED && unicharset_.has_special_codes()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
219 joined = true;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
220 } else if (results[res_index].unichar_id == UNICHAR_BROKEN &&
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
221 unicharset_.has_special_codes()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
222 broken = true;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
223 } else if (epsilon_rank == 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
224 ++num_top_answers;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
225 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
226 ++res_index;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
227 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
228 if (answer_actual_rank != 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
229 // Correct result is not absolute top.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
230 ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
231 if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
232 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
233 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
234 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
235 if (answer_epsilon_rank == 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
236 ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
237 // Unichar OK, but count if multiple unichars.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
238 if (num_top_answers > 1) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
239 ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
240 ++multi_unichar_counts_[unichar_id];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
241 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
242 // Check to see if any font in the top choice has attributes that match.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
243 // TODO(rays) It is easy to add counters for individual font attributes
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
244 // here if we want them.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
245 if (font_table.SetContainsFontProperties(font_id, results[answer_actual_rank].fonts)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
246 // Font attributes were matched.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
247 // Check for multiple properties.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
248 if (font_table.SetContainsMultipleFontProperties(results[answer_actual_rank].fonts)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
249 ++font_counts_[font_id].n[CT_OK_MULTI_FONT];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
250 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
251 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
252 // Font attributes weren't matched.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
253 ++font_counts_[font_id].n[CT_FONT_ATTR_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
254 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
255 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
256 // This is a top unichar error.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
257 ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
258 if (boosting_mode == CT_UNICHAR_TOP1_ERR) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
259 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
260 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
261 // Count maps from unichar id to wrong unichar id.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
262 ++unichar_counts_(unichar_id, results[0].unichar_id);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
263 if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
264 // It is also a 2nd choice unichar error.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
265 ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
266 if (boosting_mode == CT_UNICHAR_TOP2_ERR) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
267 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
268 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
269 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
270 if (answer_epsilon_rank < 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
271 // It is also a top-n choice unichar error.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
272 ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
273 if (boosting_mode == CT_UNICHAR_TOPN_ERR) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
274 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
275 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
276 answer_epsilon_rank = epsilon_rank;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
277 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
278 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
279 // Compute mean number of return values and mean rank of correct answer.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
280 font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
281 font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
282 if (joined) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
283 ++font_counts_[font_id].n[CT_OK_JOINED];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
284 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
285 if (broken) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
286 ++font_counts_[font_id].n[CT_OK_BROKEN];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
287 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
288 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
289 // If it was an error for boosting then sum the weight.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
290 if (sample->is_error()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
291 scaled_error_ += sample->weight();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
292 if (debug) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
293 tprintf("%d results for char %s font %d :", num_results,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
294 unicharset_.id_to_unichar(unichar_id), font_id);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
295 for (int i = 0; i < num_results; ++i) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
296 tprintf(" %.3f : %s\n", results[i].rating,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
297 unicharset_.id_to_unichar(results[i].unichar_id));
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
298 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
299 return true;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
300 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
301 int percent = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
302 if (num_results > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
303 percent = IntCastRounded(results[0].rating * 100);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
304 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
305 bad_score_hist_.add(percent, 1);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
306 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
307 int percent = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
308 if (answer_actual_rank >= 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
309 percent = IntCastRounded(results[answer_actual_rank].rating * 100);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
310 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
311 ok_score_hist_.add(percent, 1);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
312 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
313 return false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
314 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
315
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
316 // Accumulates counts for junk. Counts only whether the junk was correctly
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
317 // rejected or not.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
318 bool ErrorCounter::AccumulateJunk(bool debug, const std::vector<UnicharRating> &results,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
319 TrainingSample *sample) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
320 // For junk we accept no answer, or an explicit shape answer matching the
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
321 // class id of the sample.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
322 const int num_results = results.size();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
323 const int font_id = sample->font_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
324 const int unichar_id = sample->class_id();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
325 int percent = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
326 if (num_results > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
327 percent = IntCastRounded(results[0].rating * 100);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
328 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
329 if (num_results > 0 && results[0].unichar_id != unichar_id) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
330 // This is a junk error.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
331 ++font_counts_[font_id].n[CT_ACCEPTED_JUNK];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
332 sample->set_is_error(true);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
333 // It counts as an error for boosting too so sum the weight.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
334 scaled_error_ += sample->weight();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
335 bad_score_hist_.add(percent, 1);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
336 return debug;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
337 } else {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
338 // Correctly rejected.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
339 ++font_counts_[font_id].n[CT_REJECTED_JUNK];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
340 sample->set_is_error(false);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
341 ok_score_hist_.add(percent, 1);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
342 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
343 return false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
344 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
345
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
346 // Creates a report of the error rate. The report_level controls the detail
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
347 // that is reported to stderr via tprintf:
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
348 // 0 -> no output.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
349 // >=1 -> bottom-line error rate.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
350 // >=3 -> font-level error rate.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
351 // boosting_mode determines the return value. It selects which (un-weighted)
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
352 // error rate to return.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
353 // The fontinfo_table from MasterTrainer provides the names of fonts.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
354 // The it determines the current subset of the training samples.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
355 // If not nullptr, the top-choice unichar error rate is saved in unichar_error.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
356 // If not nullptr, the report string is saved in fonts_report.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
357 // (Ignoring report_level).
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
358 double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
359 const FontInfoTable &fontinfo_table, const SampleIterator &it,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
360 double *unichar_error, std::string *fonts_report) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
361 // Compute totals over all the fonts and report individual font results
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
362 // when required.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
363 Counts totals;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
364 int fontsize = font_counts_.size();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
365 for (int f = 0; f < fontsize; ++f) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
366 // Accumulate counts over fonts.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
367 totals += font_counts_[f];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
368 std::string font_report;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
369 if (ReportString(false, font_counts_[f], font_report)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
370 if (fonts_report != nullptr) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
371 *fonts_report += fontinfo_table.at(f).name;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
372 *fonts_report += ": ";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
373 *fonts_report += font_report;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
374 *fonts_report += "\n";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
375 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
376 if (report_level > 2) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
377 // Report individual font error rates.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
378 tprintf("%s: %s\n", fontinfo_table.at(f).name, font_report.c_str());
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
379 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
380 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
381 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
382 // Report the totals.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
383 std::string total_report;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
384 bool any_results = ReportString(true, totals, total_report);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
385 if (fonts_report != nullptr && fonts_report->empty()) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
386 // Make sure we return something even if there were no samples.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
387 *fonts_report = "NoSamplesFound: ";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
388 *fonts_report += total_report;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
389 *fonts_report += "\n";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
390 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
391 if (report_level > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
392 // Report the totals.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
393 std::string total_report;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
394 if (any_results) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
395 tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.c_str());
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
396 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
397 // Report the worst substitution error only for now.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
398 if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
399 int charsetsize = unicharset_.size();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
400 int worst_uni_id = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
401 int worst_result_id = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
402 int worst_err = 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
403 for (int u = 0; u < charsetsize; ++u) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
404 for (int v = 0; v < charsetsize; ++v) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
405 if (unichar_counts_(u, v) > worst_err) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
406 worst_err = unichar_counts_(u, v);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
407 worst_uni_id = u;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
408 worst_result_id = v;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
409 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
410 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
411 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
412 if (worst_err > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
413 tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
414 unicharset_.id_to_unichar(worst_uni_id), unicharset_.id_to_unichar(worst_result_id),
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
415 worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
416 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
417 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
418 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
419 tprintf("Multi-unichar shape use:\n");
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
420 for (int u = 0; u < multi_unichar_counts_.size(); ++u) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
421 if (multi_unichar_counts_[u] > 0) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
422 tprintf("%d multiple answers for unichar: %s\n", multi_unichar_counts_[u],
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
423 unicharset_.id_to_unichar(u));
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
424 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
425 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
426 tprintf("OK Score histogram:\n");
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
427 ok_score_hist_.print();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
428 tprintf("ERROR Score histogram:\n");
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
429 bad_score_hist_.print();
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
430 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
431
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
432 double rates[CT_SIZE];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
433 if (!ComputeRates(totals, rates)) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
434 return 0.0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
435 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
436 // Set output values if asked for.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
437 if (unichar_error != nullptr) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
438 *unichar_error = rates[CT_UNICHAR_TOP1_ERR];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
439 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
440 return rates[boosting_mode];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
441 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
442
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
443 // Sets the report string to a combined human and machine-readable report
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
444 // string of the error rates.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
445 // Returns false if there is no data, leaving report unchanged, unless
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
446 // even_if_empty is true.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
447 bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, std::string &report) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
448 // Compute the error rates.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
449 double rates[CT_SIZE];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
450 if (!ComputeRates(counts, rates) && !even_if_empty) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
451 return false;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
452 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
453 // Using %.4g%%, the length of the output string should exactly match the
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
454 // length of the format string, but in case of overflow, allow for +eddd
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
455 // on each number.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
456 const int kMaxExtraLength = 5; // Length of +eddd.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
457 // Keep this format string and the snprintf in sync with the CountTypes enum.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
458 const char format_str[] =
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
459 "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
460 "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
461 "FontAttr=%.4g%%, Multi=%.4g%%, "
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
462 "Answers=%.3g, Rank=%.3g, "
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
463 "OKjunk=%.4g%%, Badjunk=%.4g%%";
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
464 constexpr size_t max_str_len = sizeof(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
465 char formatted_str[max_str_len];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
466 snprintf(formatted_str, max_str_len, format_str, rates[CT_UNICHAR_TOP1_ERR] * 100.0,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
467 rates[CT_UNICHAR_TOP2_ERR] * 100.0, rates[CT_UNICHAR_TOPN_ERR] * 100.0,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
468 rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, rates[CT_OK_MULTI_UNICHAR] * 100.0,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
469 rates[CT_OK_JOINED] * 100.0, rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0,
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
470 rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS],
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
471 rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], 100.0 * rates[CT_ACCEPTED_JUNK]);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
472 report = formatted_str;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
473 // Now append each field of counts with a tab in front so the result can
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
474 // be loaded into a spreadsheet.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
475 for (int ct : counts.n) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
476 report += "\t" + std::to_string(ct);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
477 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
478 return true;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
479 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
480
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
481 // Computes the error rates and returns in rates which is an array of size
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
482 // CT_SIZE. Returns false if there is no data, leaving rates unchanged.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
483 bool ErrorCounter::ComputeRates(const Counts &counts, double rates[CT_SIZE]) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
484 const int ok_samples =
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
485 counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + counts.n[CT_REJECT];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
486 const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
487 // Compute rates for normal chars.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
488 double denominator = static_cast<double>(std::max(ok_samples, 1));
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
489 for (int ct = 0; ct <= CT_RANK; ++ct) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
490 rates[ct] = counts.n[ct] / denominator;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
491 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
492 // Compute rates for junk.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
493 denominator = static_cast<double>(std::max(junk_samples, 1));
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
494 for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
495 rates[ct] = counts.n[ct] / denominator;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
496 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
497 return ok_samples != 0 || junk_samples != 0;
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
498 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
499
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
500 ErrorCounter::Counts::Counts() {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
501 memset(n, 0, sizeof(n[0]) * CT_SIZE);
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
502 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
503 // Adds other into this for computing totals.
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
504 void ErrorCounter::Counts::operator+=(const Counts &other) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
505 for (int ct = 0; ct < CT_SIZE; ++ct) {
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
506 n[ct] += other.n[ct];
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
507 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
508 }
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
509
b50eed0cc0ef ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
510 } // namespace tesseract.