comparison mupdf-source/thirdparty/tesseract/src/wordrec/params_model.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: params_model.h
3 // Description: Trained feature serialization for language parameter training.
4 // Author: David Eger
5 //
6 // (C) Copyright 2011, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #ifndef TESSERACT_WORDREC_PARAMS_MODEL_H_
20 #define TESSERACT_WORDREC_PARAMS_MODEL_H_
21
22 #include <tesseract/export.h> // for TESS_API
23 #include "params_training_featdef.h" // for PTRAIN_NUM_FEATURE_TYPES
24
25 namespace tesseract {
26
27 class TFile;
28
29 // Represents the learned weights for a given language.
30 class TESS_API ParamsModel {
31 public:
32 // Enum for expressing OCR pass.
33 enum PassEnum {
34 PTRAIN_PASS1,
35 PTRAIN_PASS2,
36
37 PTRAIN_NUM_PASSES
38 };
39
40 ParamsModel() : pass_(PTRAIN_PASS1) {}
41 ParamsModel(const char *lang, const std::vector<float> &weights)
42 : lang_(lang), pass_(PTRAIN_PASS1) {
43 weights_vec_[pass_] = weights;
44 }
45 inline bool Initialized() {
46 return weights_vec_[pass_].size() == PTRAIN_NUM_FEATURE_TYPES;
47 }
48 // Prints out feature weights.
49 void Print();
50 // Clears weights for all passes.
51 void Clear() {
52 for (auto &p : weights_vec_) {
53 p.clear();
54 }
55 }
56 // Copies the weights of the given params model.
57 void Copy(const ParamsModel &other_model);
58 // Applies params model weights to the given features.
59 // Assumes that features is an array of size PTRAIN_NUM_FEATURE_TYPES.
60 float ComputeCost(const float features[]) const;
61 bool Equivalent(const ParamsModel &that) const;
62
63 // Returns true on success.
64 bool SaveToFile(const char *full_path) const;
65
66 // Returns true on success.
67 bool LoadFromFp(const char *lang, TFile *fp);
68
69 const std::vector<float> &weights() const {
70 return weights_vec_[pass_];
71 }
72 const std::vector<float> &weights_for_pass(PassEnum pass) const {
73 return weights_vec_[pass];
74 }
75 void SetPass(PassEnum pass) {
76 pass_ = pass;
77 }
78
79 private:
80 bool ParseLine(char *line, char **key, float *val);
81
82 std::string lang_;
83 // Set to the current pass type and used to determine which set of weights
84 // should be used for ComputeCost() and other functions.
85 PassEnum pass_;
86 // Several sets of weights for various OCR passes (e.g. pass1 with adaption,
87 // pass2 without adaption, etc).
88 std::vector<float> weights_vec_[PTRAIN_NUM_PASSES];
89 };
90
91 } // namespace tesseract
92
93 #endif // TESSERACT_WORDREC_PARAMS_MODEL_H_