comparison mupdf-source/thirdparty/tesseract/src/training/common/intfeaturedist.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 ///////////////////////////////////////////////////////////////////////
4 // File: intfeaturedist.cpp
5 // Description: Fast set-difference-based feature distance calculator.
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #include "intfeaturedist.h"
20 #include "intfeaturemap.h"
21
22 namespace tesseract {
23
24 IntFeatureDist::IntFeatureDist()
25 : size_(0)
26 , total_feature_weight_(0.0)
27 , feature_map_(nullptr)
28 , features_(nullptr)
29 , features_delta_one_(nullptr)
30 , features_delta_two_(nullptr) {}
31
32 IntFeatureDist::~IntFeatureDist() {
33 Clear();
34 }
35
36 // Initialize the table to the given size of feature space.
37 void IntFeatureDist::Init(const IntFeatureMap *feature_map) {
38 size_ = feature_map->sparse_size();
39 Clear();
40 feature_map_ = feature_map;
41 features_ = new bool[size_];
42 features_delta_one_ = new bool[size_];
43 features_delta_two_ = new bool[size_];
44 memset(features_, false, size_ * sizeof(features_[0]));
45 memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0]));
46 memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0]));
47 total_feature_weight_ = 0.0;
48 }
49
50 // Setup the map for the given indexed_features that have been indexed by
51 // feature_map.
52 void IntFeatureDist::Set(const std::vector<int> &indexed_features, int canonical_count,
53 bool value) {
54 total_feature_weight_ = canonical_count;
55 for (int f : indexed_features) {
56 features_[f] = value;
57 for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) {
58 if (dir == 0) {
59 continue;
60 }
61 const int mapped_f = feature_map_->OffsetFeature(f, dir);
62 if (mapped_f >= 0) {
63 features_delta_one_[mapped_f] = value;
64 for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) {
65 if (dir2 == 0) {
66 continue;
67 }
68 const int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2);
69 if (mapped_f2 >= 0) {
70 features_delta_two_[mapped_f2] = value;
71 }
72 }
73 }
74 }
75 }
76 }
77
78 // Compute the distance between the given feature vector and the last
79 // Set feature vector.
80 double IntFeatureDist::FeatureDistance(const std::vector<int> &features) const {
81 const int num_test_features = features.size();
82 const double denominator = total_feature_weight_ + num_test_features;
83 double misses = denominator;
84 for (int i = 0; i < num_test_features; ++i) {
85 const int index = features[i];
86 const double weight = 1.0;
87 if (features_[index]) {
88 // A perfect match.
89 misses -= 2.0 * weight;
90 } else if (features_delta_one_[index]) {
91 misses -= 1.5 * weight;
92 } else if (features_delta_two_[index]) {
93 // A near miss.
94 misses -= 1.0 * weight;
95 }
96 }
97 return misses / denominator;
98 }
99
100 // Compute the distance between the given feature vector and the last
101 // Set feature vector.
102 double IntFeatureDist::DebugFeatureDistance(const std::vector<int> &features) const {
103 const int num_test_features = features.size();
104 const double denominator = total_feature_weight_ + num_test_features;
105 double misses = denominator;
106 for (int i = 0; i < num_test_features; ++i) {
107 const int index = features[i];
108 const double weight = 1.0;
109 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]);
110 tprintf("Testing feature weight %g:", weight);
111 f.print();
112 if (features_[index]) {
113 // A perfect match.
114 misses -= 2.0 * weight;
115 tprintf("Perfect hit\n");
116 } else if (features_delta_one_[index]) {
117 misses -= 1.5 * weight;
118 tprintf("-1 hit\n");
119 } else if (features_delta_two_[index]) {
120 // A near miss.
121 misses -= 1.0 * weight;
122 tprintf("-2 hit\n");
123 } else {
124 tprintf("Total miss\n");
125 }
126 }
127 tprintf("Features present:");
128 for (int i = 0; i < size_; ++i) {
129 if (features_[i]) {
130 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
131 f.print();
132 }
133 }
134 tprintf("\nMinus one features:");
135 for (int i = 0; i < size_; ++i) {
136 if (features_delta_one_[i]) {
137 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
138 f.print();
139 }
140 }
141 tprintf("\nMinus two features:");
142 for (int i = 0; i < size_; ++i) {
143 if (features_delta_two_[i]) {
144 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
145 f.print();
146 }
147 }
148 tprintf("\n");
149 return misses / denominator;
150 }
151
152 // Clear all data.
153 void IntFeatureDist::Clear() {
154 delete[] features_;
155 features_ = nullptr;
156 delete[] features_delta_one_;
157 features_delta_one_ = nullptr;
158 delete[] features_delta_two_;
159 features_delta_two_ = nullptr;
160 }
161
162 } // namespace tesseract