Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/arch/intsimdmatrix.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/arch/intsimdmatrix.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,119 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrix.cpp +// Description: Base class for 8-bit int SIMD matrix multipliers. +// Author: Ray Smith +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "intsimdmatrix.h" +#include "matrix.h" // for GENERIC_2D_ARRAY +#include "simddetect.h" // for SIMDDetect + +namespace tesseract { + +const IntSimdMatrix *IntSimdMatrix::intSimdMatrix = nullptr; + +// Computes a reshaped copy of the weight matrix w. +void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t> &shaped_w, + int32_t &rounded_num_out) const { + const int num_out = w.dim1(); + const int num_in = w.dim2() - 1; + // The rounded-up sizes of the reshaped weight matrix, excluding biases. + int rounded_num_in = Roundup(num_in, num_inputs_per_group_); + rounded_num_out = RoundOutputs(num_out); + // Add the bias and compute the required size. + shaped_w.resize((rounded_num_in + 1) * rounded_num_out, 0); + int shaped_index = 0; + int output = 0; + // Each number of registers needs a different format! Iterates over the + // different numbers of registers (each a power of 2). + for (int num_registers = max_output_registers_; num_registers >= 1; num_registers /= 2) { + // The number of outputs that we will generate with this many registers. + int num_outputs_per_register_set = num_registers * num_outputs_per_register_; + // Use the max number of registers until we have to go fewer. + while (output + num_outputs_per_register_set <= rounded_num_out) { + // Accumulating outputs in registers saves iterating over the inputs, so + // we only have to do it once per output register set. + for (int input = 0; input < num_in; input += num_inputs_per_group_) { + // Iterate over the number of outputs in a register set. + for (int j = 0; j < num_outputs_per_register_set; ++j) { + // Inner-most loop corresponds to the number of inputs in an input + // group. + for (int i = 0; i < num_inputs_per_group_; ++i) { + int8_t weight = 0; + if (output + j < num_out && input + i < num_in) { + weight = w(output + j, input + i); + } + shaped_w[shaped_index++] = weight; + } + } + } + // Append the bias weights for the register set. + for (int j = 0; j < num_outputs_per_register_set; ++j) { + int8_t weight = 0; + if (output + j < num_out) { + weight = w(output + j, num_in); + } + shaped_w[shaped_index++] = weight; + } + output += num_outputs_per_register_set; + } + } +} + +// Computes matrix.vector v = Wu. +// u is of size W.dim2() - 1 and the output v is of size W.dim1(). +// u is imagined to have an extra element at the end with value 1, to +// implement the bias, but it doesn't actually have it. +void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, + const std::vector<TFloat> &scales, const int8_t *u, TFloat *v) { + int num_out = w.dim1(); + int num_in = w.dim2() - 1; + // Base implementation. + int i; + // Break up into chunks of four to facilitate vectorization + for (i = 0; i < (num_out / 4) * 4; i += 4) { + const int8_t *wi0 = w[i + 0]; + const int8_t *wi1 = w[i + 1]; + const int8_t *wi2 = w[i + 2]; + const int8_t *wi3 = w[i + 3]; + int total0 = 0; + int total1 = 0; + int total2 = 0; + int total3 = 0; + for (int j = 0; j < num_in; ++j) { + total0 += wi0[j] * u[j]; + total1 += wi1[j] * u[j]; + total2 += wi2[j] * u[j]; + total3 += wi3[j] * u[j]; + } + // Add in the bias and correct for integer values. + v[i + 0] = (total0 + wi0[num_in] * INT8_MAX) * scales[i + 0]; + v[i + 1] = (total1 + wi1[num_in] * INT8_MAX) * scales[i + 1]; + v[i + 2] = (total2 + wi2[num_in] * INT8_MAX) * scales[i + 2]; + v[i + 3] = (total3 + wi3[num_in] * INT8_MAX) * scales[i + 3]; + } + + // Capture the remainder mod four + for (; i < num_out; ++i) { + const int8_t *wi = w[i]; + int total = 0; + for (int j = 0; j < num_in; ++j) { + total += wi[j] * u[j]; + } + // Add in the bias and correct for integer values. + v[i] = (total + wi[num_in] * INT8_MAX) * scales[i]; + } +} + +} // namespace tesseract
