Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/mfoutline.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/mfoutline.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,417 @@ +/****************************************************************************** + ** Filename: mfoutline.c + ** Purpose: Interface to outline struct used for extracting features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "mfoutline.h" + +#include "blobs.h" +#include "classify.h" +#include "clusttool.h" //If remove you get caught in a loop somewhere +#include "mfx.h" +#include "params.h" + +#include <cmath> +#include <cstdio> + +namespace tesseract { + +/*---------------------------------------------------------------------------*/ +/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). + */ +LIST ConvertBlob(TBLOB *blob) { + LIST outlines = NIL_LIST; + return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer); +} + +/*---------------------------------------------------------------------------*/ +/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ +MFOUTLINE ConvertOutline(TESSLINE *outline) { + auto MFOutline = NIL_LIST; + + if (outline == nullptr || outline->loop == nullptr) { + return MFOutline; + } + + auto StartPoint = outline->loop; + auto EdgePoint = StartPoint; + do { + auto NextPoint = EdgePoint->next; + + /* filter out duplicate points */ + if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) { + auto NewPoint = new MFEDGEPT; + NewPoint->ClearMark(); + NewPoint->Hidden = EdgePoint->IsHidden(); + NewPoint->Point.x = EdgePoint->pos.x; + NewPoint->Point.y = EdgePoint->pos.y; + MFOutline = push(MFOutline, NewPoint); + } + EdgePoint = NextPoint; + } while (EdgePoint != StartPoint); + + if (MFOutline != nullptr) { + MakeOutlineCircular(MFOutline); + } + return MFOutline; +} + +/*---------------------------------------------------------------------------*/ +/** + * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). + * + * @param outline first outline to be converted + * @param mf_outlines list to add converted outlines to + * @param outline_type are the outlines outer or holes? + */ +LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) { + MFOUTLINE mf_outline; + + while (outline != nullptr) { + mf_outline = ConvertOutline(outline); + if (mf_outline != nullptr) { + mf_outlines = push(mf_outlines, mf_outline); + } + outline = outline->next; + } + return mf_outlines; +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine searches through the specified outline, computes + * a slope for each vector in the outline, and marks each + * vector as having one of the following directions: + * N, S, E, W, NE, NW, SE, SW + * This information is then stored in the outline and the + * outline is returned. + * @param Outline micro-feature outline to analyze + * @param MinSlope controls "snapping" of segments to horizontal + * @param MaxSlope controls "snapping" of segments to vertical + */ +void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) { + MFEDGEPT *Current; + MFEDGEPT *Last; + MFOUTLINE EdgePoint; + + if (DegenerateOutline(Outline)) { + return; + } + + Last = PointAt(Outline); + Outline = NextPointAfter(Outline); + EdgePoint = Outline; + do { + Current = PointAt(EdgePoint); + ComputeDirection(Last, Current, MinSlope, MaxSlope); + + Last = Current; + EdgePoint = NextPointAfter(EdgePoint); + } while (EdgePoint != Outline); + +} /* FindDirectionChanges */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine deallocates all of the memory consumed by + * a micro-feature outline. + * @param arg micro-feature outline to be freed + */ +void FreeMFOutline(void *arg) { // MFOUTLINE Outline) + auto Outline = static_cast<MFOUTLINE>(arg); + + /* break the circular outline so we can use std. techniques to deallocate */ + MFOUTLINE Start = Outline->list_rest(); + set_rest(Outline, NIL_LIST); + while (Start != nullptr) { + delete reinterpret_cast<MFEDGEPT *>(Start->first_node()); + Start = pop(Start); + } + +} /* FreeMFOutline */ + +/*---------------------------------------------------------------------------*/ +/** + * Release all memory consumed by the specified list + * of outlines. + * @param Outlines list of mf-outlines to be freed + */ +void FreeOutlines(LIST Outlines) { + destroy_nodes(Outlines, FreeMFOutline); +} /* FreeOutlines */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine searches through the specified outline and finds + * the points at which the outline changes direction. These + * points are then marked as "extremities". This routine is + * used as an alternative to FindExtremities(). It forces the + * endpoints of the microfeatures to be at the direction + * changes rather than at the midpoint between direction + * changes. + * @param Outline micro-feature outline to analyze + */ +void MarkDirectionChanges(MFOUTLINE Outline) { + MFOUTLINE Current; + MFOUTLINE Last; + MFOUTLINE First; + + if (DegenerateOutline(Outline)) { + return; + } + + First = NextDirectionChange(Outline); + Last = First; + do { + Current = NextDirectionChange(Last); + PointAt(Current)->MarkPoint(); + Last = Current; + } while (Last != First); + +} /* MarkDirectionChanges */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine returns the next point in the micro-feature + * outline that is an extremity. The search starts after + * EdgePoint. The routine assumes that the outline being + * searched is not a degenerate outline (i.e. it must have + * 2 or more edge points). + * @param EdgePoint start search from this point + * @return Next extremity in the outline after EdgePoint. + * @note Globals: none + */ +MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { + EdgePoint = NextPointAfter(EdgePoint); + while (!PointAt(EdgePoint)->ExtremityMark) { + EdgePoint = NextPointAfter(EdgePoint); + } + + return (EdgePoint); + +} /* NextExtremity */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine normalizes the coordinates of the specified + * outline so that the outline is deskewed down to the + * baseline, translated so that x=0 is at XOrigin, and scaled + * so that the height of a character cell from descender to + * ascender is 1. Of this height, 0.25 is for the descender, + * 0.25 for the ascender, and 0.5 for the x-height. The + * y coordinate of the baseline is 0. + * @param Outline outline to be normalized + * @param XOrigin x-origin of text + */ +void NormalizeOutline(MFOUTLINE Outline, float XOrigin) { + if (Outline == NIL_LIST) { + return; + } + + MFOUTLINE EdgePoint = Outline; + do { + MFEDGEPT *Current = PointAt(EdgePoint); + Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset); + Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); + EdgePoint = NextPointAfter(EdgePoint); + } while (EdgePoint != Outline); +} /* NormalizeOutline */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine normalizes every outline in Outlines + * according to the currently selected normalization method. + * It also returns the scale factors that it used to do this + * scaling. The scale factors returned represent the x and + * y sizes in the normalized coordinate system that correspond + * to 1 pixel in the original coordinate system. + * Outlines are changed and XScale and YScale are updated. + * + * Globals: + * - classify_norm_method method being used for normalization + * - classify_char_norm_range map radius of gyration to this value + * @param Outlines list of outlines to be normalized + * @param XScale x-direction scale factor used by routine + * @param YScale y-direction scale factor used by routine + */ +void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) { + MFOUTLINE Outline; + + switch (classify_norm_method) { + case character: + ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); + break; + + case baseline: + iterate(Outlines) { + Outline = static_cast<MFOUTLINE>(Outlines->first_node()); + NormalizeOutline(Outline, 0.0); + } + *XScale = *YScale = MF_SCALE_FACTOR; + break; + } +} /* NormalizeOutlines */ + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/** + * Change the direction of every vector in the specified + * outline segment to Direction. The segment to be changed + * starts at Start and ends at End. Note that the previous + * direction of End must also be changed to reflect the + * change in direction of the point before it. + * @param Start defines start of segment of outline to be modified + * @param End defines end of segment of outline to be modified + * @param Direction new direction to assign to segment + */ +void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { + MFOUTLINE Current; + + for (Current = Start; Current != End; Current = NextPointAfter(Current)) { + PointAt(Current)->Direction = Direction; + } + + PointAt(End)->PreviousDirection = Direction; + +} /* ChangeDirection */ + +/** + * This routine normalizes each point in Outline by + * translating it to the specified center and scaling it + * anisotropically according to the given scale factors. + * @param Outline outline to be character normalized + * @param cn_denorm + */ +void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) { + MFOUTLINE First, Current; + MFEDGEPT *CurrentPoint; + + if (Outline == NIL_LIST) { + return; + } + + First = Outline; + Current = First; + do { + CurrentPoint = PointAt(Current); + FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); + cn_denorm.LocalNormTransform(pos, &pos); + CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR; + CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; + + Current = NextPointAfter(Current); + } while (Current != First); + +} /* CharNormalizeOutline */ + +/** + * This routine computes the slope from Start to Finish and + * and then computes the approximate direction of the line + * segment from Start to Finish. The direction is quantized + * into 8 buckets: + * N, S, E, W, NE, NW, SE, SW + * Both the slope and the direction are then stored into + * the appropriate fields of the Start edge point. The + * direction is also stored into the PreviousDirection field + * of the Finish edge point. + * @param Start starting point to compute direction from + * @param Finish finishing point to compute direction to + * @param MinSlope slope below which lines are horizontal + * @param MaxSlope slope above which lines are vertical + */ +void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) { + FVECTOR Delta; + + Delta.x = Finish->Point.x - Start->Point.x; + Delta.y = Finish->Point.y - Start->Point.y; + if (Delta.x == 0) { + if (Delta.y < 0) { + Start->Slope = -FLT_MAX; + Start->Direction = south; + } else { + Start->Slope = FLT_MAX; + Start->Direction = north; + } + } else { + Start->Slope = Delta.y / Delta.x; + if (Delta.x > 0) { + if (Delta.y > 0) { + if (Start->Slope > MinSlope) { + if (Start->Slope < MaxSlope) { + Start->Direction = northeast; + } else { + Start->Direction = north; + } + } else { + Start->Direction = east; + } + } else if (Start->Slope < -MinSlope) { + if (Start->Slope > -MaxSlope) { + Start->Direction = southeast; + } else { + Start->Direction = south; + } + } else { + Start->Direction = east; + } + } else if (Delta.y > 0) { + if (Start->Slope < -MinSlope) { + if (Start->Slope > -MaxSlope) { + Start->Direction = northwest; + } else { + Start->Direction = north; + } + } else { + Start->Direction = west; + } + } else if (Start->Slope > MinSlope) { + if (Start->Slope < MaxSlope) { + Start->Direction = southwest; + } else { + Start->Direction = south; + } + } else { + Start->Direction = west; + } + } + Finish->PreviousDirection = Start->Direction; +} + +/** + * This routine returns the next point in the micro-feature + * outline that has a direction different than EdgePoint. The + * routine assumes that the outline being searched is not a + * degenerate outline (i.e. it must have 2 or more edge points). + * @param EdgePoint start search from this point + * @return Point of next direction change in micro-feature outline. + * @note Globals: none + */ +MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { + DIRECTION InitialDirection; + + InitialDirection = PointAt(EdgePoint)->Direction; + + MFOUTLINE next_pt = nullptr; + do { + EdgePoint = NextPointAfter(EdgePoint); + next_pt = NextPointAfter(EdgePoint); + } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden && + next_pt != nullptr && !PointAt(next_pt)->Hidden); + + return (EdgePoint); +} + +} // namespace tesseract
