Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/thirdparty/tesseract/src/classify/normmatch.cpp @ 7:5ab937c03c27
Apply full RELRO to all generated binaries.
Also strip the generated binaries.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 16 Sep 2025 12:37:32 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
/****************************************************************************** ** Filename: normmatch.c ** Purpose: Simple matcher based on character normalization features. ** Author: Dan Johnson ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ #include "normmatch.h" #include "classify.h" #include "clusttool.h" #include "helpers.h" #include "normfeat.h" #include "params.h" #include "unicharset.h" #include <cmath> #include <cstdio> #include <sstream> // for std::istringstream namespace tesseract { struct NORM_PROTOS { NORM_PROTOS(size_t n) : NumProtos(n), Protos(n) { } int NumParams = 0; int NumProtos; PARAM_DESC *ParamDesc = nullptr; std::vector<LIST> Protos; }; /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ /** * @name NormEvidenceOf * * Return the new type of evidence number corresponding to this * normalization adjustment. The equation that represents the transform is: * 1 / (1 + (NormAdj / midpoint) ^ curl) */ static float NormEvidenceOf(float NormAdj) { NormAdj /= static_cast<float>(classify_norm_adj_midpoint); if (classify_norm_adj_curl == 3) { NormAdj = NormAdj * NormAdj * NormAdj; } else if (classify_norm_adj_curl == 2) { NormAdj = NormAdj * NormAdj; } else { NormAdj = std::pow(NormAdj, static_cast<float>(classify_norm_adj_curl)); } return (1 / (1 + NormAdj)); } /*---------------------------------------------------------------------------- Variables ----------------------------------------------------------------------------*/ /** control knobs used to control the normalization adjustment process */ double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); /** Weight of width variance against height and vertical position. */ const float kWidthErrorWeighting = 0.125f; /*---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------*/ /** * This routine compares Features against each character * normalization proto for ClassId and returns the match * rating of the best match. * @param ClassId id of class to match against * @param feature character normalization feature * @param DebugMatch controls dump of debug info * * Globals: * #NormProtos character normalization prototypes * * @return Best match rating for Feature against protos of ClassId. */ float Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) { if (ClassId >= NormProtos->NumProtos) { ClassId = NO_CLASS; } /* handle requests for classification as noise */ if (ClassId == NO_CLASS) { /* kludge - clean up constants and make into control knobs later */ float Match = (feature.Params[CharNormLength] * feature.Params[CharNormLength] * 500.0f + feature.Params[CharNormRx] * feature.Params[CharNormRx] * 8000.0f + feature.Params[CharNormRy] * feature.Params[CharNormRy] * 8000.0f); return (1 - NormEvidenceOf(Match)); } if (DebugMatch) { tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId)); } LIST Protos = NormProtos->Protos[ClassId]; if (Protos == nullptr) { // Avoid FP overflow in NormEvidenceOf. return 1.0f; } float BestMatch = FLT_MAX; iterate(Protos) { auto Proto = reinterpret_cast<PROTOTYPE *>(Protos->first_node()); float Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; float Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY]; if (DebugMatch) { tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", Proto->Mean[CharNormY], Delta, Proto->Weight.Elliptical[CharNormY], Match); } Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx]; Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx]; if (DebugMatch) { tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", Proto->Mean[CharNormRx], Delta, Proto->Weight.Elliptical[CharNormRx], Match); } // Ry is width! See intfx.cpp. Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy]; if (DebugMatch) { tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", Proto->Mean[CharNormRy], Delta, Proto->Weight.Elliptical[CharNormRy]); } Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy]; Delta *= kWidthErrorWeighting; Match += Delta; if (DebugMatch) { tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", Match, Match / classify_norm_adj_midpoint, NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match))); } if (Match < BestMatch) { BestMatch = Match; } } return 1 - NormEvidenceOf(BestMatch); } /* ComputeNormMatch */ void Classify::FreeNormProtos() { if (NormProtos != nullptr) { for (int i = 0; i < NormProtos->NumProtos; i++) { FreeProtoList(&NormProtos->Protos[i]); } delete[] NormProtos->ParamDesc; delete NormProtos; NormProtos = nullptr; } } /** * This routine allocates a new data structure to hold * a set of character normalization protos. It then fills in * the data structure by reading from the specified File. * @param fp open text file to read normalization protos from * Globals: none * @return Character normalization protos. */ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { char unichar[2 * UNICHAR_LEN + 1]; UNICHAR_ID unichar_id; LIST Protos; int NumProtos; /* allocate and initialization data structure */ auto NormProtos = new NORM_PROTOS(unicharset.size()); /* read file header and save in data structure */ NormProtos->NumParams = ReadSampleSize(fp); NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams); /* read protos for each class into a separate list */ const int kMaxLineSize = 100; char line[kMaxLineSize]; while (fp->FGets(line, kMaxLineSize) != nullptr) { std::istringstream stream(line); stream.imbue(std::locale::classic()); stream >> unichar >> NumProtos; if (stream.fail()) { continue; } if (unicharset.contains_unichar(unichar)) { unichar_id = unicharset.unichar_to_id(unichar); Protos = NormProtos->Protos[unichar_id]; for (int i = 0; i < NumProtos; i++) { Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams)); } NormProtos->Protos[unichar_id] = Protos; } else { tprintf("Error: unichar %s in normproto file is not in unichar set.\n", unichar); for (int i = 0; i < NumProtos; i++) { FreePrototype(ReadPrototype(fp, NormProtos->NumParams)); } } } return NormProtos; } /* ReadNormProtos */ } // namespace tesseract
