comparison mupdf-source/thirdparty/tesseract/src/arch/intsimdmatrix.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: intsimdmatrix.cpp
3 // Description: Base class for 8-bit int SIMD matrix multipliers.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2017, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 ///////////////////////////////////////////////////////////////////////
17
18 #include "intsimdmatrix.h"
19 #include "matrix.h" // for GENERIC_2D_ARRAY
20 #include "simddetect.h" // for SIMDDetect
21
22 namespace tesseract {
23
24 const IntSimdMatrix *IntSimdMatrix::intSimdMatrix = nullptr;
25
26 // Computes a reshaped copy of the weight matrix w.
27 void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t> &shaped_w,
28 int32_t &rounded_num_out) const {
29 const int num_out = w.dim1();
30 const int num_in = w.dim2() - 1;
31 // The rounded-up sizes of the reshaped weight matrix, excluding biases.
32 int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
33 rounded_num_out = RoundOutputs(num_out);
34 // Add the bias and compute the required size.
35 shaped_w.resize((rounded_num_in + 1) * rounded_num_out, 0);
36 int shaped_index = 0;
37 int output = 0;
38 // Each number of registers needs a different format! Iterates over the
39 // different numbers of registers (each a power of 2).
40 for (int num_registers = max_output_registers_; num_registers >= 1; num_registers /= 2) {
41 // The number of outputs that we will generate with this many registers.
42 int num_outputs_per_register_set = num_registers * num_outputs_per_register_;
43 // Use the max number of registers until we have to go fewer.
44 while (output + num_outputs_per_register_set <= rounded_num_out) {
45 // Accumulating outputs in registers saves iterating over the inputs, so
46 // we only have to do it once per output register set.
47 for (int input = 0; input < num_in; input += num_inputs_per_group_) {
48 // Iterate over the number of outputs in a register set.
49 for (int j = 0; j < num_outputs_per_register_set; ++j) {
50 // Inner-most loop corresponds to the number of inputs in an input
51 // group.
52 for (int i = 0; i < num_inputs_per_group_; ++i) {
53 int8_t weight = 0;
54 if (output + j < num_out && input + i < num_in) {
55 weight = w(output + j, input + i);
56 }
57 shaped_w[shaped_index++] = weight;
58 }
59 }
60 }
61 // Append the bias weights for the register set.
62 for (int j = 0; j < num_outputs_per_register_set; ++j) {
63 int8_t weight = 0;
64 if (output + j < num_out) {
65 weight = w(output + j, num_in);
66 }
67 shaped_w[shaped_index++] = weight;
68 }
69 output += num_outputs_per_register_set;
70 }
71 }
72 }
73
74 // Computes matrix.vector v = Wu.
75 // u is of size W.dim2() - 1 and the output v is of size W.dim1().
76 // u is imagined to have an extra element at the end with value 1, to
77 // implement the bias, but it doesn't actually have it.
78 void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
79 const std::vector<TFloat> &scales, const int8_t *u, TFloat *v) {
80 int num_out = w.dim1();
81 int num_in = w.dim2() - 1;
82 // Base implementation.
83 int i;
84 // Break up into chunks of four to facilitate vectorization
85 for (i = 0; i < (num_out / 4) * 4; i += 4) {
86 const int8_t *wi0 = w[i + 0];
87 const int8_t *wi1 = w[i + 1];
88 const int8_t *wi2 = w[i + 2];
89 const int8_t *wi3 = w[i + 3];
90 int total0 = 0;
91 int total1 = 0;
92 int total2 = 0;
93 int total3 = 0;
94 for (int j = 0; j < num_in; ++j) {
95 total0 += wi0[j] * u[j];
96 total1 += wi1[j] * u[j];
97 total2 += wi2[j] * u[j];
98 total3 += wi3[j] * u[j];
99 }
100 // Add in the bias and correct for integer values.
101 v[i + 0] = (total0 + wi0[num_in] * INT8_MAX) * scales[i + 0];
102 v[i + 1] = (total1 + wi1[num_in] * INT8_MAX) * scales[i + 1];
103 v[i + 2] = (total2 + wi2[num_in] * INT8_MAX) * scales[i + 2];
104 v[i + 3] = (total3 + wi3[num_in] * INT8_MAX) * scales[i + 3];
105 }
106
107 // Capture the remainder mod four
108 for (; i < num_out; ++i) {
109 const int8_t *wi = w[i];
110 int total = 0;
111 for (int j = 0; j < num_in; ++j) {
112 total += wi[j] * u[j];
113 }
114 // Add in the bias and correct for integer values.
115 v[i] = (total + wi[num_in] * INT8_MAX) * scales[i];
116 }
117 }
118
119 } // namespace tesseract