Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/lstm/functions.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: functions.h | |
| 3 // Description: Collection of function-objects used by the network layers. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2014, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 /////////////////////////////////////////////////////////////////////// | |
| 17 | |
| 18 #ifndef TESSERACT_LSTM_FUNCTIONS_H_ | |
| 19 #define TESSERACT_LSTM_FUNCTIONS_H_ | |
| 20 | |
| 21 #include "helpers.h" | |
| 22 #include "tesstypes.h" | |
| 23 | |
| 24 // Setting this to 1 or more causes massive dumps of debug data: weights, | |
| 25 // updates, internal calculations etc, and reduces the number of test iterations | |
| 26 // to a small number, so outputs can be diffed. | |
| 27 #define DEBUG_DETAIL 0 | |
| 28 #if DEBUG_DETAIL > 0 | |
| 29 # undef _OPENMP // Disable open mp to get the outputs in sync. | |
| 30 #endif | |
| 31 | |
| 32 namespace tesseract { | |
| 33 | |
| 34 // Size of static tables. | |
| 35 constexpr int kTableSize = 4096; | |
| 36 // Scale factor for float arg to int index. | |
| 37 constexpr TFloat kScaleFactor = 256.0; | |
| 38 | |
| 39 // Generated lookup tables. | |
| 40 extern const TFloat TanhTable[]; | |
| 41 extern const TFloat LogisticTable[]; | |
| 42 | |
| 43 // Non-linearity (sigmoid) functions with cache tables and clipping. | |
| 44 inline TFloat Tanh(TFloat x) { | |
| 45 if (x < 0) { | |
| 46 return -Tanh(-x); | |
| 47 } | |
| 48 x *= kScaleFactor; | |
| 49 auto index = static_cast<unsigned>(x); | |
| 50 if (index >= (kTableSize - 1)) { | |
| 51 return 1; | |
| 52 } | |
| 53 TFloat tanh_i0 = TanhTable[index]; | |
| 54 TFloat tanh_i1 = TanhTable[index + 1]; | |
| 55 // Linear interpolation. | |
| 56 return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index); | |
| 57 } | |
| 58 | |
| 59 inline TFloat Logistic(TFloat x) { | |
| 60 if (x < 0) { | |
| 61 return 1 - Logistic(-x); | |
| 62 } | |
| 63 x *= kScaleFactor; | |
| 64 auto index = static_cast<unsigned>(x); | |
| 65 if (index >= (kTableSize - 1)) { | |
| 66 return 1; | |
| 67 } | |
| 68 TFloat l0 = LogisticTable[index]; | |
| 69 TFloat l1 = LogisticTable[index + 1]; | |
| 70 // Linear interpolation. | |
| 71 return l0 + (l1 - l0) * (x - index); | |
| 72 } | |
| 73 | |
| 74 // Non-linearity (sigmoid) functions and their derivatives. | |
| 75 struct FFunc { | |
| 76 inline TFloat operator()(TFloat x) const { | |
| 77 return Logistic(x); | |
| 78 } | |
| 79 }; | |
| 80 struct FPrime { | |
| 81 inline TFloat operator()(TFloat y) const { | |
| 82 return y * (1 - y); | |
| 83 } | |
| 84 }; | |
| 85 struct ClipFFunc { | |
| 86 inline TFloat operator()(TFloat x) const { | |
| 87 if (x <= 0) { | |
| 88 return 0; | |
| 89 } | |
| 90 if (x >= 1) { | |
| 91 return 1; | |
| 92 } | |
| 93 return x; | |
| 94 } | |
| 95 }; | |
| 96 struct ClipFPrime { | |
| 97 inline TFloat operator()(TFloat y) const { | |
| 98 return 0 < y && y < 1 ? 1 : 0; | |
| 99 } | |
| 100 }; | |
| 101 struct Relu { | |
| 102 inline TFloat operator()(TFloat x) const { | |
| 103 if (x <= 0) { | |
| 104 return 0; | |
| 105 } | |
| 106 return x; | |
| 107 } | |
| 108 }; | |
| 109 struct ReluPrime { | |
| 110 inline TFloat operator()(TFloat y) const { | |
| 111 return 0 < y ? 1 : 0; | |
| 112 } | |
| 113 }; | |
| 114 struct GFunc { | |
| 115 inline TFloat operator()(TFloat x) const { | |
| 116 return Tanh(x); | |
| 117 } | |
| 118 }; | |
| 119 struct GPrime { | |
| 120 inline TFloat operator()(TFloat y) const { | |
| 121 return 1 - y * y; | |
| 122 } | |
| 123 }; | |
| 124 struct ClipGFunc { | |
| 125 inline TFloat operator()(TFloat x) const { | |
| 126 if (x <= -1) { | |
| 127 return -1; | |
| 128 } | |
| 129 if (x >= 1) { | |
| 130 return 1; | |
| 131 } | |
| 132 return x; | |
| 133 } | |
| 134 }; | |
| 135 struct ClipGPrime { | |
| 136 inline TFloat operator()(TFloat y) const { | |
| 137 return -1 < y && y < 1 ? 1 : 0; | |
| 138 } | |
| 139 }; | |
| 140 struct HFunc { | |
| 141 inline TFloat operator()(TFloat x) const { | |
| 142 return Tanh(x); | |
| 143 } | |
| 144 }; | |
| 145 struct HPrime { | |
| 146 inline TFloat operator()(TFloat y) const { | |
| 147 TFloat u = Tanh(y); | |
| 148 return 1 - u * u; | |
| 149 } | |
| 150 }; | |
| 151 struct UnityFunc { | |
| 152 inline TFloat operator()(TFloat /*x*/) const { | |
| 153 return 1.0; | |
| 154 } | |
| 155 }; | |
| 156 struct IdentityFunc { | |
| 157 inline TFloat operator()(TFloat x) const { | |
| 158 return x; | |
| 159 } | |
| 160 }; | |
| 161 | |
| 162 // Applies Func in-place to inout, of size n. | |
| 163 template <class Func> | |
| 164 inline void FuncInplace(int n, TFloat *inout) { | |
| 165 Func f; | |
| 166 for (int i = 0; i < n; ++i) { | |
| 167 inout[i] = f(inout[i]); | |
| 168 } | |
| 169 } | |
| 170 // Applies Func to u and multiplies the result by v component-wise, | |
| 171 // putting the product in out, all of size n. | |
| 172 template <class Func> | |
| 173 inline void FuncMultiply(const TFloat *u, const TFloat *v, int n, TFloat *out) { | |
| 174 Func f; | |
| 175 for (int i = 0; i < n; ++i) { | |
| 176 out[i] = f(u[i]) * v[i]; | |
| 177 } | |
| 178 } | |
| 179 // Applies the Softmax function in-place to inout, of size n. | |
| 180 template <typename T> | |
| 181 inline void SoftmaxInPlace(int n, T *inout) { | |
| 182 if (n <= 0) { | |
| 183 return; | |
| 184 } | |
| 185 // A limit on the negative range input to exp to guarantee non-zero output. | |
| 186 const T kMaxSoftmaxActivation = 86; | |
| 187 | |
| 188 T max_output = inout[0]; | |
| 189 for (int i = 1; i < n; i++) { | |
| 190 T output = inout[i]; | |
| 191 if (output > max_output) { | |
| 192 max_output = output; | |
| 193 } | |
| 194 } | |
| 195 T prob_total = 0; | |
| 196 for (int i = 0; i < n; i++) { | |
| 197 T prob = inout[i] - max_output; | |
| 198 prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0))); | |
| 199 prob_total += prob; | |
| 200 inout[i] = prob; | |
| 201 } | |
| 202 if (prob_total > 0) { | |
| 203 for (int i = 0; i < n; i++) { | |
| 204 inout[i] /= prob_total; | |
| 205 } | |
| 206 } | |
| 207 } | |
| 208 | |
| 209 // Copies n values of the given src vector to dest. | |
| 210 inline void CopyVector(unsigned n, const TFloat *src, TFloat *dest) { | |
| 211 memcpy(dest, src, n * sizeof(dest[0])); | |
| 212 } | |
| 213 | |
| 214 // Adds n values of the given src vector to dest. | |
| 215 inline void AccumulateVector(int n, const TFloat *src, TFloat *dest) { | |
| 216 for (int i = 0; i < n; ++i) { | |
| 217 dest[i] += src[i]; | |
| 218 } | |
| 219 } | |
| 220 | |
| 221 // Multiplies n values of inout in-place element-wise by the given src vector. | |
| 222 inline void MultiplyVectorsInPlace(int n, const TFloat *src, TFloat *inout) { | |
| 223 for (int i = 0; i < n; ++i) { | |
| 224 inout[i] *= src[i]; | |
| 225 } | |
| 226 } | |
| 227 | |
| 228 // Multiplies n values of u by v, element-wise, accumulating to out. | |
| 229 inline void MultiplyAccumulate(int n, const TFloat *u, const TFloat *v, TFloat *out) { | |
| 230 for (int i = 0; i < n; i++) { | |
| 231 out[i] += u[i] * v[i]; | |
| 232 } | |
| 233 } | |
| 234 | |
| 235 // Sums the given 5 n-vectors putting the result into sum. | |
| 236 inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *v3, | |
| 237 const TFloat *v4, const TFloat *v5, TFloat *sum) { | |
| 238 for (int i = 0; i < n; ++i) { | |
| 239 sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i]; | |
| 240 } | |
| 241 } | |
| 242 | |
| 243 // Sets the given n-vector vec to 0. | |
| 244 template <typename T> | |
| 245 inline void ZeroVector(unsigned n, T *vec) { | |
| 246 memset(vec, 0, n * sizeof(*vec)); | |
| 247 } | |
| 248 | |
| 249 // Clips the given vector vec, of size n to [lower, upper]. | |
| 250 template <typename T> | |
| 251 inline void ClipVector(int n, T lower, T upper, T *vec) { | |
| 252 for (int i = 0; i < n; ++i) { | |
| 253 vec[i] = ClipToRange(vec[i], lower, upper); | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 // Converts the given n-vector to a binary encoding of the maximum value, | |
| 258 // encoded as vector of nf binary values. | |
| 259 inline void CodeInBinary(int n, int nf, TFloat *vec) { | |
| 260 if (nf <= 0 || n < nf) { | |
| 261 return; | |
| 262 } | |
| 263 int index = 0; | |
| 264 TFloat best_score = vec[0]; | |
| 265 for (int i = 1; i < n; ++i) { | |
| 266 if (vec[i] > best_score) { | |
| 267 best_score = vec[i]; | |
| 268 index = i; | |
| 269 } | |
| 270 } | |
| 271 int mask = 1; | |
| 272 for (int i = 0; i < nf; ++i, mask *= 2) { | |
| 273 vec[i] = (index & mask) ? 1.0 : 0.0; | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 } // namespace tesseract. | |
| 278 | |
| 279 #endif // TESSERACT_LSTM_FUNCTIONS_H_ |
