Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/linlsq.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: linlsq.cpp (Formerly llsq.c) | |
| 3 * Description: Linear Least squares fitting code. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1991, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "linlsq.h" | |
| 20 #include <cmath> // for std::sqrt | |
| 21 #include <cstdio> | |
| 22 #include "errcode.h" | |
| 23 | |
| 24 namespace tesseract { | |
| 25 | |
| 26 constexpr ERRCODE EMPTY_LLSQ("Can't delete from an empty LLSQ"); | |
| 27 | |
| 28 /********************************************************************** | |
| 29 * LLSQ::clear | |
| 30 * | |
| 31 * Function to initialize a LLSQ. | |
| 32 **********************************************************************/ | |
| 33 | |
| 34 void LLSQ::clear() { // initialize | |
| 35 total_weight = 0.0; // no elements | |
| 36 sigx = 0.0; // update accumulators | |
| 37 sigy = 0.0; | |
| 38 sigxx = 0.0; | |
| 39 sigxy = 0.0; | |
| 40 sigyy = 0.0; | |
| 41 } | |
| 42 | |
| 43 /********************************************************************** | |
| 44 * LLSQ::add | |
| 45 * | |
| 46 * Add an element to the accumulator. | |
| 47 **********************************************************************/ | |
| 48 | |
| 49 void LLSQ::add(double x, double y) { // add an element | |
| 50 total_weight++; // count elements | |
| 51 sigx += x; // update accumulators | |
| 52 sigy += y; | |
| 53 sigxx += x * x; | |
| 54 sigxy += x * y; | |
| 55 sigyy += y * y; | |
| 56 } | |
| 57 // Adds an element with a specified weight. | |
| 58 void LLSQ::add(double x, double y, double weight) { | |
| 59 total_weight += weight; | |
| 60 sigx += x * weight; // update accumulators | |
| 61 sigy += y * weight; | |
| 62 sigxx += x * x * weight; | |
| 63 sigxy += x * y * weight; | |
| 64 sigyy += y * y * weight; | |
| 65 } | |
| 66 // Adds a whole LLSQ. | |
| 67 void LLSQ::add(const LLSQ &other) { | |
| 68 total_weight += other.total_weight; | |
| 69 sigx += other.sigx; // update accumulators | |
| 70 sigy += other.sigy; | |
| 71 sigxx += other.sigxx; | |
| 72 sigxy += other.sigxy; | |
| 73 sigyy += other.sigyy; | |
| 74 } | |
| 75 | |
| 76 /********************************************************************** | |
| 77 * LLSQ::remove | |
| 78 * | |
| 79 * Delete an element from the acculuator. | |
| 80 **********************************************************************/ | |
| 81 | |
| 82 void LLSQ::remove(double x, double y) { // delete an element | |
| 83 if (total_weight <= 0.0) { // illegal | |
| 84 EMPTY_LLSQ.error("LLSQ::remove", ABORT); | |
| 85 } | |
| 86 total_weight--; // count elements | |
| 87 sigx -= x; // update accumulators | |
| 88 sigy -= y; | |
| 89 sigxx -= x * x; | |
| 90 sigxy -= x * y; | |
| 91 sigyy -= y * y; | |
| 92 } | |
| 93 | |
| 94 /********************************************************************** | |
| 95 * LLSQ::m | |
| 96 * | |
| 97 * Return the gradient of the line fit. | |
| 98 **********************************************************************/ | |
| 99 | |
| 100 double LLSQ::m() const { // get gradient | |
| 101 double covar = covariance(); | |
| 102 double x_var = x_variance(); | |
| 103 if (x_var != 0.0) { | |
| 104 return covar / x_var; | |
| 105 } else { | |
| 106 return 0.0; // too little | |
| 107 } | |
| 108 } | |
| 109 | |
| 110 /********************************************************************** | |
| 111 * LLSQ::c | |
| 112 * | |
| 113 * Return the constant of the line fit. | |
| 114 **********************************************************************/ | |
| 115 | |
| 116 double LLSQ::c(double m) const { // get constant | |
| 117 if (total_weight > 0.0) { | |
| 118 return (sigy - m * sigx) / total_weight; | |
| 119 } else { | |
| 120 return 0; // too little | |
| 121 } | |
| 122 } | |
| 123 | |
| 124 /********************************************************************** | |
| 125 * LLSQ::rms | |
| 126 * | |
| 127 * Return the rms error of the fit. | |
| 128 **********************************************************************/ | |
| 129 | |
| 130 double LLSQ::rms(double m, double c) const { // get error | |
| 131 double error; // total error | |
| 132 | |
| 133 if (total_weight > 0) { | |
| 134 error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (total_weight * c - 2 * sigy); | |
| 135 if (error >= 0) { | |
| 136 error = std::sqrt(error / total_weight); // sqrt of mean | |
| 137 } else { | |
| 138 error = 0; | |
| 139 } | |
| 140 } else { | |
| 141 error = 0; // too little | |
| 142 } | |
| 143 return error; | |
| 144 } | |
| 145 | |
| 146 /********************************************************************** | |
| 147 * LLSQ::pearson | |
| 148 * | |
| 149 * Return the pearson product moment correlation coefficient. | |
| 150 **********************************************************************/ | |
| 151 | |
| 152 double LLSQ::pearson() const { // get correlation | |
| 153 double r = 0.0; // Correlation is 0 if insufficient data. | |
| 154 | |
| 155 double covar = covariance(); | |
| 156 if (covar != 0.0) { | |
| 157 double var_product = x_variance() * y_variance(); | |
| 158 if (var_product > 0.0) { | |
| 159 r = covar / std::sqrt(var_product); | |
| 160 } | |
| 161 } | |
| 162 return r; | |
| 163 } | |
| 164 | |
| 165 // Returns the x,y means as an FCOORD. | |
| 166 FCOORD LLSQ::mean_point() const { | |
| 167 if (total_weight > 0.0) { | |
| 168 return FCOORD(sigx / total_weight, sigy / total_weight); | |
| 169 } else { | |
| 170 return FCOORD(0.0f, 0.0f); | |
| 171 } | |
| 172 } | |
| 173 | |
| 174 // Returns the sqrt of the mean squared error measured perpendicular from the | |
| 175 // line through mean_point() in the direction dir. | |
| 176 // | |
| 177 // Derivation: | |
| 178 // Lemma: Let v and x_i (i=1..N) be a k-dimensional vectors (1xk matrices). | |
| 179 // Let % be dot product and ' be transpose. Note that: | |
| 180 // Sum[i=1..N] (v % x_i)^2 | |
| 181 // = v * [x_1' x_2' ... x_N'] * [x_1' x_2' .. x_N']' * v' | |
| 182 // If x_i have average 0 we have: | |
| 183 // = v * (N * COVARIANCE_MATRIX(X)) * v' | |
| 184 // Expanded for the case that k = 2, where we treat the dimensions | |
| 185 // as x_i and y_i, this is: | |
| 186 // = v * (N * [VAR(X), COV(X,Y); COV(X,Y) VAR(Y)]) * v' | |
| 187 // Now, we are trying to calculate the mean squared error, where v is | |
| 188 // perpendicular to our line of interest: | |
| 189 // Mean squared error | |
| 190 // = E [ (v % (x_i - x_avg))) ^2 ] | |
| 191 // = Sum (v % (x_i - x_avg))^2 / N | |
| 192 // = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v' | |
| 193 // = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v' | |
| 194 // = code below | |
| 195 double LLSQ::rms_orth(const FCOORD &dir) const { | |
| 196 FCOORD v = !dir; | |
| 197 v.normalise(); | |
| 198 return std::sqrt(x_variance() * v.x() * v.x() + 2 * covariance() * v.x() * v.y() + | |
| 199 y_variance() * v.y() * v.y()); | |
| 200 } | |
| 201 | |
| 202 // Returns the direction of the fitted line as a unit vector, using the | |
| 203 // least mean squared perpendicular distance. The line runs through the | |
| 204 // mean_point, i.e. a point p on the line is given by: | |
| 205 // p = mean_point() + lambda * vector_fit() for some real number lambda. | |
| 206 // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous | |
| 207 // and may be negated without changing its meaning. | |
| 208 // Fitting a line m + ๐v to a set of N points Pi = (xi, yi), where | |
| 209 // m is the mean point (๐, ๐) and | |
| 210 // v is the direction vector (cos๐, sin๐) | |
| 211 // The perpendicular distance of each Pi from the line is: | |
| 212 // (Pi - m) x v, where x is the scalar cross product. | |
| 213 // Total squared error is thus: | |
| 214 // E = โ((xi - ๐)sin๐ - (yi - ๐)cos๐)ยฒ | |
| 215 // = โ(xi - ๐)ยฒsinยฒ๐ - 2โ(xi - ๐)(yi - ๐)sin๐ cos๐ + โ(yi - ๐)ยฒcosยฒ๐ | |
| 216 // = NVar(xi)sinยฒ๐ - 2NCovar(xi, yi)sin๐ cos๐ + NVar(yi)cosยฒ๐ (Eq 1) | |
| 217 // where Var(xi) is the variance of xi, | |
| 218 // and Covar(xi, yi) is the covariance of xi, yi. | |
| 219 // Taking the derivative wrt ๐ and setting to 0 to obtain the min/max: | |
| 220 // 0 = 2NVar(xi)sin๐ cos๐ -2NCovar(xi, yi)(cosยฒ๐ - sinยฒ๐) -2NVar(yi)sin๐ cos๐ | |
| 221 // => Covar(xi, yi)(cosยฒ๐ - sinยฒ๐) = (Var(xi) - Var(yi))sin๐ cos๐ | |
| 222 // Using double angles: | |
| 223 // 2Covar(xi, yi)cos2๐ = (Var(xi) - Var(yi))sin2๐ (Eq 2) | |
| 224 // So ๐ = 0.5 atan2(2Covar(xi, yi), Var(xi) - Var(yi)) (Eq 3) | |
| 225 | |
| 226 // Because it involves 2๐ , Eq 2 has 2 solutions 90 degrees apart, but which | |
| 227 // is the min and which is the max? From Eq1: | |
| 228 // E/N = Var(xi)sinยฒ๐ - 2Covar(xi, yi)sin๐ cos๐ + Var(yi)cosยฒ๐ | |
| 229 // and 90 degrees away, using sin/cos equivalences: | |
| 230 // E'/N = Var(xi)cosยฒ๐ + 2Covar(xi, yi)sin๐ cos๐ + Var(yi)sinยฒ๐ | |
| 231 // The second error is smaller (making it the minimum) iff | |
| 232 // E'/N < E/N ie: | |
| 233 // (Var(xi) - Var(yi))(cosยฒ๐ - sinยฒ๐) < -4Covar(xi, yi)sin๐ cos๐ | |
| 234 // Using double angles: | |
| 235 // (Var(xi) - Var(yi))cos2๐ < -2Covar(xi, yi)sin2๐ (InEq 1) | |
| 236 // But atan2(2Covar(xi, yi), Var(xi) - Var(yi)) picks 2๐ such that: | |
| 237 // sgn(cos2๐) = sgn(Var(xi) - Var(yi)) and sgn(sin2๐) = sgn(Covar(xi, yi)) | |
| 238 // so InEq1 can *never* be true, making the atan2 result *always* the min! | |
| 239 // In the degenerate case, where Covar(xi, yi) = 0 AND Var(xi) = Var(yi), | |
| 240 // the 2 solutions have equal error and the inequality is still false. | |
| 241 // Therefore the solution really is as trivial as Eq 3. | |
| 242 | |
| 243 // This is equivalent to returning the Principal Component in PCA, or the | |
| 244 // eigenvector corresponding to the largest eigenvalue in the covariance | |
| 245 // matrix. However, atan2 is much simpler! The one reference I found that | |
| 246 // uses this formula is http://web.mit.edu/18.06/www/Essays/tlsfit.pdf but | |
| 247 // that is still a much more complex derivation. It seems Pearson had already | |
| 248 // found this simple solution in 1901. | |
| 249 // http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559 | |
| 250 FCOORD LLSQ::vector_fit() const { | |
| 251 double x_var = x_variance(); | |
| 252 double y_var = y_variance(); | |
| 253 double covar = covariance(); | |
| 254 double theta = 0.5 * atan2(2.0 * covar, x_var - y_var); | |
| 255 FCOORD result(cos(theta), sin(theta)); | |
| 256 return result; | |
| 257 } | |
| 258 | |
| 259 } // namespace tesseract |
