comparison mupdf-source/thirdparty/tesseract/src/arch/dotproductneon.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: dotproductneon.cpp
3 // Description: Dot product function for ARM NEON.
4 // Author: Stefan Weil
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 ///////////////////////////////////////////////////////////////////////
16
17 #if defined(__ARM_NEON)
18
19 #include <arm_neon.h>
20 #include "dotproduct.h"
21
22 namespace tesseract {
23
24 // Documentation:
25 // https://developer.arm.com/architectures/instruction-sets/intrinsics/
26
27 #if defined(FAST_FLOAT) && defined(__ARM_ARCH_ISA_A64)
28
29 float DotProductNEON(const float *u, const float *v, int n) {
30 float32x4_t result0123 = vdupq_n_f32(0.0f);
31 float32x4_t result4567 = vdupq_n_f32(0.0f);
32 while (n > 7) {
33 // Calculate 8 dot products per iteration.
34 float32x4_t u0 = vld1q_f32(u);
35 float32x4_t v0 = vld1q_f32(v);
36 float32x4_t u4 = vld1q_f32(u + 4);
37 float32x4_t v4 = vld1q_f32(v + 4);
38 result0123 = vfmaq_f32(result0123, u0, v0);
39 result4567 = vfmaq_f32(result4567, u4, v4);
40 u += 8;
41 v += 8;
42 n -= 8;
43 }
44 float total = vaddvq_f32(result0123);
45 total += vaddvq_f32(result4567);
46 while (n > 0) {
47 total += *u++ * *v++;
48 n--;
49 }
50 return total;
51 }
52
53 #else
54
55 // Computes and returns the dot product of the two n-vectors u and v.
56 TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n) {
57 TFloat total = 0;
58 #if defined(OPENMP_SIMD) || defined(_OPENMP)
59 #pragma omp simd reduction(+:total)
60 #endif
61 for (int k = 0; k < n; k++) {
62 total += u[k] * v[k];
63 }
64 return total;
65 }
66
67 #endif
68
69 } // namespace tesseract
70
71 #endif /* __ARM_NEON */