Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/normfeat.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/normfeat.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,70 @@ +/****************************************************************************** + ** Filename: normfeat.c + ** Purpose: Definition of char normalization features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "normfeat.h" + +#include "featdefs.h" +#include "intfx.h" +#include "mfoutline.h" + +namespace tesseract { + +/** Return the length of the outline in baseline normalized form. */ +float ActualOutlineLength(FEATURE Feature) { + return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); +} + +/** + * Return the character normalization feature for a blob. + * + * The features returned are in a scale where the x-height has been + * normalized to live in the region y = [-0.25 .. 0.25]. Example ranges + * for English below are based on the Linux font collection on 2009-12-04: + * + * - Params[CharNormY] + * - The y coordinate of the grapheme's centroid. + * - English: [-0.27, 0.71] + * + * - Params[CharNormLength] + * - The length of the grapheme's outline (tiny segments discarded), + * divided by 10.0=LENGTH_COMPRESSION. + * - English: [0.16, 0.85] + * + * - Params[CharNormRx] + * - The radius of gyration about the x axis, as measured from CharNormY. + * - English: [0.011, 0.34] + * + * - Params[CharNormRy] + * - The radius of gyration about the y axis, as measured from + * the x center of the grapheme's bounding box. + * - English: [0.011, 0.31] + */ +FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info) { + auto feature_set = new FEATURE_SET_STRUCT(1); + auto feature = new FEATURE_STRUCT(&CharNormDesc); + + feature->Params[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); + feature->Params[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; + feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; + feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; + + AddFeature(feature_set, feature); + + return feature_set; +} /* ExtractCharNormFeatures */ + +} // namespace tesseract
