Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/cluster.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: cluster.h | |
| 3 ** Purpose: Definition of feature space clustering routines | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 *****************************************************************************/ | |
| 17 | |
| 18 #ifndef CLUSTER_H | |
| 19 #define CLUSTER_H | |
| 20 | |
| 21 #include "kdtree.h" | |
| 22 #include "oldlist.h" | |
| 23 | |
| 24 namespace tesseract { | |
| 25 | |
| 26 struct BUCKETS; | |
| 27 | |
| 28 #define MINBUCKETS 5 | |
| 29 #define MAXBUCKETS 39 | |
| 30 | |
| 31 /*---------------------------------------------------------------------- | |
| 32 Types | |
| 33 ----------------------------------------------------------------------*/ | |
| 34 struct CLUSTER { | |
| 35 CLUSTER(size_t n) : Mean(n) { | |
| 36 } | |
| 37 | |
| 38 ~CLUSTER() { | |
| 39 delete Left; | |
| 40 delete Right; | |
| 41 } | |
| 42 | |
| 43 bool Clustered : 1; // true if included in a higher cluster | |
| 44 bool Prototype : 1; // true if cluster represented by a proto | |
| 45 unsigned SampleCount : 30; // number of samples in this cluster | |
| 46 CLUSTER *Left; // ptr to left sub-cluster | |
| 47 CLUSTER *Right; // ptr to right sub-cluster | |
| 48 int32_t CharID; // identifier of char sample came from | |
| 49 std::vector<float> Mean; // mean of cluster - SampleSize floats | |
| 50 }; | |
| 51 using SAMPLE = CLUSTER; // can refer to as either sample or cluster | |
| 52 | |
| 53 typedef enum { spherical, elliptical, mixed, automatic } PROTOSTYLE; | |
| 54 | |
| 55 struct CLUSTERCONFIG { // parameters to control clustering | |
| 56 PROTOSTYLE ProtoStyle; // specifies types of protos to be made | |
| 57 float MinSamples; // min # of samples per proto - % of total | |
| 58 float MaxIllegal; // max percentage of samples in a cluster which | |
| 59 // have more than 1 feature in that cluster | |
| 60 float Independence; // desired independence between dimensions | |
| 61 double Confidence; // desired confidence in prototypes created | |
| 62 int MagicSamples; // Ideal number of samples in a cluster. | |
| 63 }; | |
| 64 | |
| 65 typedef enum { normal, uniform, D_random, DISTRIBUTION_COUNT } DISTRIBUTION; | |
| 66 | |
| 67 union FLOATUNION { | |
| 68 float Spherical; | |
| 69 float *Elliptical; | |
| 70 }; | |
| 71 | |
| 72 struct PROTOTYPE { | |
| 73 bool Significant : 1; // true if prototype is significant | |
| 74 bool Merged : 1; // Merged after clustering so do not output | |
| 75 // but kept for display purposes. If it has no | |
| 76 // samples then it was actually merged. | |
| 77 // Otherwise it matched an already significant | |
| 78 // cluster. | |
| 79 unsigned Style : 2; // spherical, elliptical, or mixed | |
| 80 unsigned NumSamples : 28; // number of samples in the cluster | |
| 81 CLUSTER *Cluster; // ptr to cluster which made prototype | |
| 82 std::vector<DISTRIBUTION> Distrib; // different distribution for each dimension | |
| 83 std::vector<float> Mean; // prototype mean | |
| 84 float TotalMagnitude; // total magnitude over all dimensions | |
| 85 float LogMagnitude; // log base e of TotalMagnitude | |
| 86 FLOATUNION Variance; // prototype variance | |
| 87 FLOATUNION Magnitude; // magnitude of density function | |
| 88 FLOATUNION Weight; // weight of density function | |
| 89 }; | |
| 90 | |
| 91 struct CLUSTERER { | |
| 92 int16_t SampleSize; // number of parameters per sample | |
| 93 PARAM_DESC *ParamDesc; // description of each parameter | |
| 94 int32_t NumberOfSamples; // total number of samples being clustered | |
| 95 KDTREE *KDTree; // for optimal nearest neighbor searching | |
| 96 CLUSTER *Root; // ptr to root cluster of cluster tree | |
| 97 LIST ProtoList; // list of prototypes | |
| 98 uint32_t NumChar; // # of characters represented by samples | |
| 99 // cache of reusable histograms by distribution type and number of buckets. | |
| 100 BUCKETS *bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS]; | |
| 101 }; | |
| 102 | |
| 103 struct SAMPLELIST { | |
| 104 int32_t NumSamples; // number of samples in list | |
| 105 int32_t MaxNumSamples; // maximum size of list | |
| 106 SAMPLE *Sample[1]; // array of ptrs to sample data structures | |
| 107 }; | |
| 108 | |
| 109 // low level cluster tree analysis routines. | |
| 110 #define InitSampleSearch(S, C) (((C) == nullptr) ? (S = NIL_LIST) : (S = push(NIL_LIST, (C)))) | |
| 111 | |
| 112 /*-------------------------------------------------------------------------- | |
| 113 Public Function Prototypes | |
| 114 --------------------------------------------------------------------------*/ | |
| 115 TESS_API | |
| 116 CLUSTERER *MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]); | |
| 117 | |
| 118 TESS_API | |
| 119 SAMPLE *MakeSample(CLUSTERER *Clusterer, const float *Feature, uint32_t CharID); | |
| 120 | |
| 121 TESS_API | |
| 122 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); | |
| 123 | |
| 124 TESS_API | |
| 125 void FreeClusterer(CLUSTERER *Clusterer); | |
| 126 | |
| 127 TESS_API | |
| 128 void FreeProtoList(LIST *ProtoList); | |
| 129 | |
| 130 void FreePrototype(void *arg); // PROTOTYPE *Prototype); | |
| 131 | |
| 132 CLUSTER *NextSample(LIST *SearchState); | |
| 133 | |
| 134 float Mean(PROTOTYPE *Proto, uint16_t Dimension); | |
| 135 | |
| 136 float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension); | |
| 137 | |
| 138 TESS_API | |
| 139 int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], | |
| 140 float m1[], float m2[]); | |
| 141 | |
| 142 } // namespace tesseract | |
| 143 | |
| 144 #endif |
