Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/common/intfeaturemap.h @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | |
| 2 // Author: rays@google.com (Ray Smith) | |
| 3 /////////////////////////////////////////////////////////////////////// | |
| 4 // File: intfeaturemap.h | |
| 5 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi | |
| 6 // to provide a subspace mapping and fast feature lookup. | |
| 7 // | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_ | |
| 21 #define TESSERACT_CLASSIFY_INTFEATUREMAP_H_ | |
| 22 | |
| 23 #include "export.h" | |
| 24 #include "indexmapbidi.h" | |
| 25 #include "intfeaturespace.h" | |
| 26 #include "intproto.h" | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 class SampleIterator; | |
| 31 | |
| 32 // Number of positive and negative offset maps. | |
| 33 static const int kNumOffsetMaps = 2; | |
| 34 | |
| 35 // Class to map a feature space defined by INT_FEATURE_STRUCT to a compact | |
| 36 // down-sampled subspace of actually used features. | |
| 37 // The IntFeatureMap copes with 2 stages of transformation: | |
| 38 // The first step is down-sampling (re-quantization) and converting to a | |
| 39 // single index value from the 3-D input: | |
| 40 // INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and | |
| 41 // the second is a feature-space compaction to map only the feature indices | |
| 42 // that are actually used. This saves space in classifiers that are built | |
| 43 // using the mapped feature space. | |
| 44 // index (sparse) feature <-> map (compact) feature via IndexMapBiDi. | |
| 45 // Although the transformations are reversible, the inverses are lossy and do | |
| 46 // not return the exact input INT_FEATURE_STRUCT, due to the many->one nature | |
| 47 // of both transformations. | |
| 48 class TESS_COMMON_TRAINING_API IntFeatureMap { | |
| 49 public: | |
| 50 IntFeatureMap(); | |
| 51 ~IntFeatureMap(); | |
| 52 | |
| 53 // Accessors. | |
| 54 int sparse_size() const { | |
| 55 return feature_space_.Size(); | |
| 56 } | |
| 57 int compact_size() const { | |
| 58 return compact_size_; | |
| 59 } | |
| 60 const IntFeatureSpace &feature_space() const { | |
| 61 return feature_space_; | |
| 62 } | |
| 63 const IndexMapBiDi &feature_map() const { | |
| 64 return feature_map_; | |
| 65 } | |
| 66 | |
| 67 // Pseudo-accessors. | |
| 68 int IndexFeature(const INT_FEATURE_STRUCT &f) const; | |
| 69 int MapFeature(const INT_FEATURE_STRUCT &f) const; | |
| 70 int MapIndexFeature(int index_feature) const; | |
| 71 INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const; | |
| 72 INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const; | |
| 73 void DeleteMapFeature(int map_feature); | |
| 74 bool IsMapFeatureDeleted(int map_feature) const; | |
| 75 | |
| 76 // Copies the given feature_space and uses it as the index feature map | |
| 77 // from INT_FEATURE_STRUCT. | |
| 78 void Init(const IntFeatureSpace &feature_space); | |
| 79 | |
| 80 // Helper to return an offset index feature. In this context an offset | |
| 81 // feature with a dir of +/-1 is a feature of a similar direction, | |
| 82 // but shifted perpendicular to the direction of the feature. An offset | |
| 83 // feature with a dir of +/-2 is feature at the same position, but rotated | |
| 84 // by +/- one [compact] quantum. Returns the index of the generated offset | |
| 85 // feature, or -1 if it doesn't exist. Dir should be in | |
| 86 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. | |
| 87 // A dir of 0 is an identity transformation. | |
| 88 // Both input and output are from the index(sparse) feature space, not | |
| 89 // the mapped/compact feature space, but the offset feature is the minimum | |
| 90 // distance moved from the input to guarantee that it maps to the next | |
| 91 // available quantum in the mapped/compact space. | |
| 92 int OffsetFeature(int index_feature, int dir) const; | |
| 93 | |
| 94 // Computes the features used by the subset of samples defined by | |
| 95 // the iterator and sets up the feature mapping. | |
| 96 // Returns the size of the compacted feature space. | |
| 97 int FindNZFeatureMapping(SampleIterator *it); | |
| 98 | |
| 99 // After deleting some features, finish setting up the mapping, and map | |
| 100 // all the samples. Returns the size of the compacted feature space. | |
| 101 int FinalizeMapping(SampleIterator *it); | |
| 102 | |
| 103 // Indexes the given array of features to a vector of sorted indices. | |
| 104 void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, | |
| 105 std::vector<int> *sorted_features) const { | |
| 106 feature_space_.IndexAndSortFeatures(features, num_features, sorted_features); | |
| 107 } | |
| 108 // Maps the given array of index/sparse features to an array of map/compact | |
| 109 // features. | |
| 110 // Assumes the input is sorted. The output indices are sorted and uniqued. | |
| 111 // Returns the number of "missed" features, being features that | |
| 112 // don't map to the compact feature space. | |
| 113 int MapIndexedFeatures(const std::vector<int> &index_features, | |
| 114 std::vector<int> *map_features) const { | |
| 115 return feature_map_.MapFeatures(index_features, map_features); | |
| 116 } | |
| 117 | |
| 118 // Prints the map features from the set in human-readable form. | |
| 119 void DebugMapFeatures(const std::vector<int> &map_features) const; | |
| 120 | |
| 121 private: | |
| 122 void Clear(); | |
| 123 | |
| 124 // Helper to compute an offset index feature. In this context an offset | |
| 125 // feature with a dir of +/-1 is a feature of a similar direction, | |
| 126 // but shifted perpendicular to the direction of the feature. An offset | |
| 127 // feature with a dir of +/-2 is feature at the same position, but rotated | |
| 128 // by +/- one [compact] quantum. Returns the index of the generated offset | |
| 129 // feature, or -1 if it doesn't exist. Dir should be in | |
| 130 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. | |
| 131 // A dir of 0 is an identity transformation. | |
| 132 // Both input and output are from the index(sparse) feature space, not | |
| 133 // the mapped/compact feature space, but the offset feature is the minimum | |
| 134 // distance moved from the input to guarantee that it maps to the next | |
| 135 // available quantum in the mapped/compact space. | |
| 136 int ComputeOffsetFeature(int index_feature, int dir) const; | |
| 137 | |
| 138 // True if the mapping has changed since it was last finalized. | |
| 139 bool mapping_changed_; | |
| 140 // Size of the compacted feature space, after unused features are removed. | |
| 141 int compact_size_; | |
| 142 // Feature space quantization definition and indexing from INT_FEATURE_STRUCT. | |
| 143 IntFeatureSpace feature_space_; | |
| 144 // Mapping from indexed feature space to the compacted space with unused | |
| 145 // features mapping to -1. | |
| 146 IndexMapBiDi feature_map_; | |
| 147 // Index tables to map a feature index to the corresponding feature after a | |
| 148 // shift perpendicular to the feature direction, or a rotation in place. | |
| 149 // An entry of -1 indicates that there is no corresponding feature. | |
| 150 // Array of arrays of size feature_space_.Size() owned by this class. | |
| 151 int *offset_plus_[kNumOffsetMaps]; | |
| 152 int *offset_minus_[kNumOffsetMaps]; | |
| 153 | |
| 154 // Don't use default copy and assign! | |
| 155 IntFeatureMap(const IntFeatureMap &); | |
| 156 void operator=(const IntFeatureMap &); | |
| 157 }; | |
| 158 | |
| 159 } // namespace tesseract. | |
| 160 | |
| 161 #endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_ |
