Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/training/common/intfeaturemap.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/training/common/intfeaturemap.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,161 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: intfeaturemap.h +// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi +// to provide a subspace mapping and fast feature lookup. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_ +#define TESSERACT_CLASSIFY_INTFEATUREMAP_H_ + +#include "export.h" +#include "indexmapbidi.h" +#include "intfeaturespace.h" +#include "intproto.h" + +namespace tesseract { + +class SampleIterator; + +// Number of positive and negative offset maps. +static const int kNumOffsetMaps = 2; + +// Class to map a feature space defined by INT_FEATURE_STRUCT to a compact +// down-sampled subspace of actually used features. +// The IntFeatureMap copes with 2 stages of transformation: +// The first step is down-sampling (re-quantization) and converting to a +// single index value from the 3-D input: +// INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and +// the second is a feature-space compaction to map only the feature indices +// that are actually used. This saves space in classifiers that are built +// using the mapped feature space. +// index (sparse) feature <-> map (compact) feature via IndexMapBiDi. +// Although the transformations are reversible, the inverses are lossy and do +// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature +// of both transformations. +class TESS_COMMON_TRAINING_API IntFeatureMap { +public: + IntFeatureMap(); + ~IntFeatureMap(); + + // Accessors. + int sparse_size() const { + return feature_space_.Size(); + } + int compact_size() const { + return compact_size_; + } + const IntFeatureSpace &feature_space() const { + return feature_space_; + } + const IndexMapBiDi &feature_map() const { + return feature_map_; + } + + // Pseudo-accessors. + int IndexFeature(const INT_FEATURE_STRUCT &f) const; + int MapFeature(const INT_FEATURE_STRUCT &f) const; + int MapIndexFeature(int index_feature) const; + INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const; + INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const; + void DeleteMapFeature(int map_feature); + bool IsMapFeatureDeleted(int map_feature) const; + + // Copies the given feature_space and uses it as the index feature map + // from INT_FEATURE_STRUCT. + void Init(const IntFeatureSpace &feature_space); + + // Helper to return an offset index feature. In this context an offset + // feature with a dir of +/-1 is a feature of a similar direction, + // but shifted perpendicular to the direction of the feature. An offset + // feature with a dir of +/-2 is feature at the same position, but rotated + // by +/- one [compact] quantum. Returns the index of the generated offset + // feature, or -1 if it doesn't exist. Dir should be in + // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. + // A dir of 0 is an identity transformation. + // Both input and output are from the index(sparse) feature space, not + // the mapped/compact feature space, but the offset feature is the minimum + // distance moved from the input to guarantee that it maps to the next + // available quantum in the mapped/compact space. + int OffsetFeature(int index_feature, int dir) const; + + // Computes the features used by the subset of samples defined by + // the iterator and sets up the feature mapping. + // Returns the size of the compacted feature space. + int FindNZFeatureMapping(SampleIterator *it); + + // After deleting some features, finish setting up the mapping, and map + // all the samples. Returns the size of the compacted feature space. + int FinalizeMapping(SampleIterator *it); + + // Indexes the given array of features to a vector of sorted indices. + void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, + std::vector<int> *sorted_features) const { + feature_space_.IndexAndSortFeatures(features, num_features, sorted_features); + } + // Maps the given array of index/sparse features to an array of map/compact + // features. + // Assumes the input is sorted. The output indices are sorted and uniqued. + // Returns the number of "missed" features, being features that + // don't map to the compact feature space. + int MapIndexedFeatures(const std::vector<int> &index_features, + std::vector<int> *map_features) const { + return feature_map_.MapFeatures(index_features, map_features); + } + + // Prints the map features from the set in human-readable form. + void DebugMapFeatures(const std::vector<int> &map_features) const; + +private: + void Clear(); + + // Helper to compute an offset index feature. In this context an offset + // feature with a dir of +/-1 is a feature of a similar direction, + // but shifted perpendicular to the direction of the feature. An offset + // feature with a dir of +/-2 is feature at the same position, but rotated + // by +/- one [compact] quantum. Returns the index of the generated offset + // feature, or -1 if it doesn't exist. Dir should be in + // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. + // A dir of 0 is an identity transformation. + // Both input and output are from the index(sparse) feature space, not + // the mapped/compact feature space, but the offset feature is the minimum + // distance moved from the input to guarantee that it maps to the next + // available quantum in the mapped/compact space. + int ComputeOffsetFeature(int index_feature, int dir) const; + + // True if the mapping has changed since it was last finalized. + bool mapping_changed_; + // Size of the compacted feature space, after unused features are removed. + int compact_size_; + // Feature space quantization definition and indexing from INT_FEATURE_STRUCT. + IntFeatureSpace feature_space_; + // Mapping from indexed feature space to the compacted space with unused + // features mapping to -1. + IndexMapBiDi feature_map_; + // Index tables to map a feature index to the corresponding feature after a + // shift perpendicular to the feature direction, or a rotation in place. + // An entry of -1 indicates that there is no corresponding feature. + // Array of arrays of size feature_space_.Size() owned by this class. + int *offset_plus_[kNumOffsetMaps]; + int *offset_minus_[kNumOffsetMaps]; + + // Don't use default copy and assign! + IntFeatureMap(const IntFeatureMap &); + void operator=(const IntFeatureMap &); +}; + +} // namespace tesseract. + +#endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_
