comparison mupdf-source/thirdparty/tesseract/src/lstm/input.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: input.cpp
3 // Description: Input layer class for neural network implementations.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2014, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 ///////////////////////////////////////////////////////////////////////
17
18 #include "input.h"
19
20 #include <allheaders.h>
21 #include "imagedata.h"
22 #include "pageres.h"
23 #include "scrollview.h"
24
25 namespace tesseract {
26
27 // Max height for variable height inputs before scaling anyway.
28 const int kMaxInputHeight = 48;
29
30 Input::Input(const std::string &name, int ni, int no)
31 : Network(NT_INPUT, name, ni, no), cached_x_scale_(1) {}
32 Input::Input(const std::string &name, const StaticShape &shape)
33 : Network(NT_INPUT, name, shape.height(), shape.depth()), shape_(shape), cached_x_scale_(1) {
34 if (shape.height() == 1) {
35 ni_ = shape.depth();
36 }
37 }
38
39 // Writes to the given file. Returns false in case of error.
40 bool Input::Serialize(TFile *fp) const {
41 return Network::Serialize(fp) && shape_.Serialize(fp);
42 }
43
44 // Reads from the given file. Returns false in case of error.
45 bool Input::DeSerialize(TFile *fp) {
46 return shape_.DeSerialize(fp);
47 }
48
49 // Returns an integer reduction factor that the network applies to the
50 // time sequence. Assumes that any 2-d is already eliminated. Used for
51 // scaling bounding boxes of truth data.
52 int Input::XScaleFactor() const {
53 return 1;
54 }
55
56 // Provides the (minimum) x scale factor to the network (of interest only to
57 // input units) so they can determine how to scale bounding boxes.
58 void Input::CacheXScaleFactor(int factor) {
59 cached_x_scale_ = factor;
60 }
61
62 // Runs forward propagation of activations on the input line.
63 // See Network for a detailed discussion of the arguments.
64 void Input::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose,
65 NetworkScratch *scratch, NetworkIO *output) {
66 *output = input;
67 }
68
69 // Runs backward propagation of errors on the deltas line.
70 // See NetworkCpp for a detailed discussion of the arguments.
71 bool Input::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
72 NetworkIO *back_deltas) {
73 tprintf("Input::Backward should not be called!!\n");
74 return false;
75 }
76
77 // Creates and returns a Pix of appropriate size for the network from the
78 // image_data. If non-null, *image_scale returns the image scale factor used.
79 // Returns nullptr on error.
80 /* static */
81 Image Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
82 TRand *randomizer, float *image_scale) {
83 // Note that NumInputs() is defined as input image height.
84 int target_height = network->NumInputs();
85 int width, height;
86 Image pix =
87 image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr);
88 if (pix == nullptr) {
89 tprintf("Bad pix from ImageData!\n");
90 return nullptr;
91 }
92 if (width < min_width || height < min_width) {
93 tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width);
94 pix.destroy();
95 return nullptr;
96 }
97 return pix;
98 }
99
100 // Converts the given pix to a NetworkIO of height and depth appropriate to the
101 // given StaticShape:
102 // If depth == 3, convert to 24 bit color, otherwise normalized grey.
103 // Scale to target height, if the shape's height is > 1, or its depth if the
104 // height == 1. If height == 0 then no scaling.
105 // NOTE: It isn't safe for multiple threads to call this on the same pix.
106 /* static */
107 void Input::PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
108 NetworkIO *input) {
109 bool color = shape.depth() == 3;
110 Image var_pix = pix;
111 int depth = pixGetDepth(var_pix);
112 Image normed_pix = nullptr;
113 // On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
114 // colormap, so we just have to deal with depth conversion here.
115 if (color) {
116 // Force RGB.
117 if (depth == 32) {
118 normed_pix = var_pix.clone();
119 } else {
120 normed_pix = pixConvertTo32(var_pix);
121 }
122 } else {
123 // Convert non-8-bit images to 8 bit.
124 if (depth == 8) {
125 normed_pix = var_pix.clone();
126 } else {
127 normed_pix = pixConvertTo8(var_pix, false);
128 }
129 }
130 int height = pixGetHeight(normed_pix);
131 int target_height = shape.height();
132 if (target_height == 1) {
133 target_height = shape.depth();
134 }
135 if (target_height != 0 && target_height != height) {
136 // Get the scaled image.
137 float im_factor = static_cast<float>(target_height) / height;
138 Image scaled_pix = pixScale(normed_pix, im_factor, im_factor);
139 normed_pix.destroy();
140 normed_pix = scaled_pix;
141 }
142 input->FromPix(shape, normed_pix, randomizer);
143 normed_pix.destroy();
144 }
145
146 } // namespace tesseract.