view mupdf-source/thirdparty/tesseract/src/classify/mfoutline.cpp @ 39:a6bc019ac0b2 upstream

ADD: PyMuPDF v1.26.5: the original sdist.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:19:58 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

/******************************************************************************
 ** Filename:    mfoutline.c
 ** Purpose:     Interface to outline struct used for extracting features
 ** Author:      Dan Johnson
 **
 ** (c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 ******************************************************************************/

#include "mfoutline.h"

#include "blobs.h"
#include "classify.h"
#include "clusttool.h" //If remove you get caught in a loop somewhere
#include "mfx.h"
#include "params.h"

#include <cmath>
#include <cstdio>

namespace tesseract {

/*---------------------------------------------------------------------------*/
/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
 */
LIST ConvertBlob(TBLOB *blob) {
  LIST outlines = NIL_LIST;
  return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer);
}

/*---------------------------------------------------------------------------*/
/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
MFOUTLINE ConvertOutline(TESSLINE *outline) {
  auto MFOutline = NIL_LIST;

  if (outline == nullptr || outline->loop == nullptr) {
    return MFOutline;
  }

  auto StartPoint = outline->loop;
  auto EdgePoint = StartPoint;
  do {
    auto NextPoint = EdgePoint->next;

    /* filter out duplicate points */
    if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) {
      auto NewPoint = new MFEDGEPT;
      NewPoint->ClearMark();
      NewPoint->Hidden = EdgePoint->IsHidden();
      NewPoint->Point.x = EdgePoint->pos.x;
      NewPoint->Point.y = EdgePoint->pos.y;
      MFOutline = push(MFOutline, NewPoint);
    }
    EdgePoint = NextPoint;
  } while (EdgePoint != StartPoint);

  if (MFOutline != nullptr) {
    MakeOutlineCircular(MFOutline);
  }
  return MFOutline;
}

/*---------------------------------------------------------------------------*/
/**
 * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
 *
 * @param outline      first outline to be converted
 * @param mf_outlines  list to add converted outlines to
 * @param outline_type  are the outlines outer or holes?
 */
LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) {
  MFOUTLINE mf_outline;

  while (outline != nullptr) {
    mf_outline = ConvertOutline(outline);
    if (mf_outline != nullptr) {
      mf_outlines = push(mf_outlines, mf_outline);
    }
    outline = outline->next;
  }
  return mf_outlines;
}

/*---------------------------------------------------------------------------*/
/**
 * This routine searches through the specified outline, computes
 * a slope for each vector in the outline, and marks each
 * vector as having one of the following directions:
 *   N, S, E, W, NE, NW, SE, SW
 * This information is then stored in the outline and the
 * outline is returned.
 * @param Outline   micro-feature outline to analyze
 * @param MinSlope  controls "snapping" of segments to horizontal
 * @param MaxSlope  controls "snapping" of segments to vertical
 */
void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) {
  MFEDGEPT *Current;
  MFEDGEPT *Last;
  MFOUTLINE EdgePoint;

  if (DegenerateOutline(Outline)) {
    return;
  }

  Last = PointAt(Outline);
  Outline = NextPointAfter(Outline);
  EdgePoint = Outline;
  do {
    Current = PointAt(EdgePoint);
    ComputeDirection(Last, Current, MinSlope, MaxSlope);

    Last = Current;
    EdgePoint = NextPointAfter(EdgePoint);
  } while (EdgePoint != Outline);

} /* FindDirectionChanges */

/*---------------------------------------------------------------------------*/
/**
 * This routine deallocates all of the memory consumed by
 * a micro-feature outline.
 * @param arg   micro-feature outline to be freed
 */
void FreeMFOutline(void *arg) { // MFOUTLINE Outline)
  auto Outline = static_cast<MFOUTLINE>(arg);

  /* break the circular outline so we can use std. techniques to deallocate */
  MFOUTLINE Start = Outline->list_rest();
  set_rest(Outline, NIL_LIST);
  while (Start != nullptr) {
    delete reinterpret_cast<MFEDGEPT *>(Start->first_node());
    Start = pop(Start);
  }

} /* FreeMFOutline */

/*---------------------------------------------------------------------------*/
/**
 * Release all memory consumed by the specified list
 * of outlines.
 * @param Outlines  list of mf-outlines to be freed
 */
void FreeOutlines(LIST Outlines) {
  destroy_nodes(Outlines, FreeMFOutline);
} /* FreeOutlines */

/*---------------------------------------------------------------------------*/
/**
 * This routine searches through the specified outline and finds
 * the points at which the outline changes direction.  These
 * points are then marked as "extremities".  This routine is
 * used as an alternative to FindExtremities().  It forces the
 * endpoints of the microfeatures to be at the direction
 * changes rather than at the midpoint between direction
 * changes.
 * @param Outline   micro-feature outline to analyze
 */
void MarkDirectionChanges(MFOUTLINE Outline) {
  MFOUTLINE Current;
  MFOUTLINE Last;
  MFOUTLINE First;

  if (DegenerateOutline(Outline)) {
    return;
  }

  First = NextDirectionChange(Outline);
  Last = First;
  do {
    Current = NextDirectionChange(Last);
    PointAt(Current)->MarkPoint();
    Last = Current;
  } while (Last != First);

} /* MarkDirectionChanges */

/*---------------------------------------------------------------------------*/
/**
 * This routine returns the next point in the micro-feature
 * outline that is an extremity.  The search starts after
 * EdgePoint.  The routine assumes that the outline being
 * searched is not a degenerate outline (i.e. it must have
 * 2 or more edge points).
 * @param EdgePoint start search from this point
 * @return Next extremity in the outline after EdgePoint.
 * @note Globals: none
 */
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
  EdgePoint = NextPointAfter(EdgePoint);
  while (!PointAt(EdgePoint)->ExtremityMark) {
    EdgePoint = NextPointAfter(EdgePoint);
  }

  return (EdgePoint);

} /* NextExtremity */

/*---------------------------------------------------------------------------*/
/**
 * This routine normalizes the coordinates of the specified
 * outline so that the outline is deskewed down to the
 * baseline, translated so that x=0 is at XOrigin, and scaled
 * so that the height of a character cell from descender to
 * ascender is 1.  Of this height, 0.25 is for the descender,
 * 0.25 for the ascender, and 0.5 for the x-height.  The
 * y coordinate of the baseline is 0.
 * @param Outline   outline to be normalized
 * @param XOrigin   x-origin of text
 */
void NormalizeOutline(MFOUTLINE Outline, float XOrigin) {
  if (Outline == NIL_LIST) {
    return;
  }

  MFOUTLINE EdgePoint = Outline;
  do {
    MFEDGEPT *Current = PointAt(EdgePoint);
    Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset);
    Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
    EdgePoint = NextPointAfter(EdgePoint);
  } while (EdgePoint != Outline);
} /* NormalizeOutline */

/*---------------------------------------------------------------------------*/
/**
 * This routine normalizes every outline in Outlines
 * according to the currently selected normalization method.
 * It also returns the scale factors that it used to do this
 * scaling.  The scale factors returned represent the x and
 * y sizes in the normalized coordinate system that correspond
 * to 1 pixel in the original coordinate system.
 * Outlines are changed and XScale and YScale are updated.
 *
 * Globals:
 * - classify_norm_method  method being used for normalization
 * - classify_char_norm_range map radius of gyration to this value
 * @param Outlines  list of outlines to be normalized
 * @param XScale    x-direction scale factor used by routine
 * @param YScale    y-direction scale factor used by routine
 */
void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) {
  MFOUTLINE Outline;

  switch (classify_norm_method) {
    case character:
      ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
      break;

    case baseline:
      iterate(Outlines) {
        Outline = static_cast<MFOUTLINE>(Outlines->first_node());
        NormalizeOutline(Outline, 0.0);
      }
      *XScale = *YScale = MF_SCALE_FACTOR;
      break;
  }
} /* NormalizeOutlines */

/*----------------------------------------------------------------------------
              Private Code
----------------------------------------------------------------------------*/
/**
 * Change the direction of every vector in the specified
 * outline segment to Direction.  The segment to be changed
 * starts at Start and ends at End.  Note that the previous
 * direction of End must also be changed to reflect the
 * change in direction of the point before it.
 * @param Start defines start of segment of outline to be modified
 * @param End defines end of segment of outline to be modified
 * @param Direction new direction to assign to segment
 */
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
  MFOUTLINE Current;

  for (Current = Start; Current != End; Current = NextPointAfter(Current)) {
    PointAt(Current)->Direction = Direction;
  }

  PointAt(End)->PreviousDirection = Direction;

} /* ChangeDirection */

/**
 * This routine normalizes each point in Outline by
 * translating it to the specified center and scaling it
 * anisotropically according to the given scale factors.
 * @param Outline     outline to be character normalized
 * @param cn_denorm
 */
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) {
  MFOUTLINE First, Current;
  MFEDGEPT *CurrentPoint;

  if (Outline == NIL_LIST) {
    return;
  }

  First = Outline;
  Current = First;
  do {
    CurrentPoint = PointAt(Current);
    FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
    cn_denorm.LocalNormTransform(pos, &pos);
    CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
    CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;

    Current = NextPointAfter(Current);
  } while (Current != First);

} /* CharNormalizeOutline */

/**
 * This routine computes the slope from Start to Finish and
 * and then computes the approximate direction of the line
 * segment from Start to Finish.  The direction is quantized
 * into 8 buckets:
 *  N, S, E, W, NE, NW, SE, SW
 * Both the slope and the direction are then stored into
 * the appropriate fields of the Start edge point.  The
 * direction is also stored into the PreviousDirection field
 * of the Finish edge point.
 * @param Start   starting point to compute direction from
 * @param Finish    finishing point to compute direction to
 * @param MinSlope  slope below which lines are horizontal
 * @param MaxSlope  slope above which lines are vertical
 */
void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) {
  FVECTOR Delta;

  Delta.x = Finish->Point.x - Start->Point.x;
  Delta.y = Finish->Point.y - Start->Point.y;
  if (Delta.x == 0) {
    if (Delta.y < 0) {
      Start->Slope = -FLT_MAX;
      Start->Direction = south;
    } else {
      Start->Slope = FLT_MAX;
      Start->Direction = north;
    }
  } else {
    Start->Slope = Delta.y / Delta.x;
    if (Delta.x > 0) {
      if (Delta.y > 0) {
        if (Start->Slope > MinSlope) {
          if (Start->Slope < MaxSlope) {
            Start->Direction = northeast;
          } else {
            Start->Direction = north;
          }
        } else {
          Start->Direction = east;
        }
      } else if (Start->Slope < -MinSlope) {
        if (Start->Slope > -MaxSlope) {
          Start->Direction = southeast;
        } else {
          Start->Direction = south;
        }
      } else {
        Start->Direction = east;
      }
    } else if (Delta.y > 0) {
      if (Start->Slope < -MinSlope) {
        if (Start->Slope > -MaxSlope) {
          Start->Direction = northwest;
        } else {
          Start->Direction = north;
        }
      } else {
        Start->Direction = west;
      }
    } else if (Start->Slope > MinSlope) {
      if (Start->Slope < MaxSlope) {
        Start->Direction = southwest;
      } else {
        Start->Direction = south;
      }
    } else {
      Start->Direction = west;
    }
  }
  Finish->PreviousDirection = Start->Direction;
}

/**
 * This routine returns the next point in the micro-feature
 * outline that has a direction different than EdgePoint.  The
 * routine assumes that the outline being searched is not a
 * degenerate outline (i.e. it must have 2 or more edge points).
 * @param EdgePoint start search from this point
 * @return Point of next direction change in micro-feature outline.
 * @note Globals: none
 */
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
  DIRECTION InitialDirection;

  InitialDirection = PointAt(EdgePoint)->Direction;

  MFOUTLINE next_pt = nullptr;
  do {
    EdgePoint = NextPointAfter(EdgePoint);
    next_pt = NextPointAfter(EdgePoint);
  } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden &&
           next_pt != nullptr && !PointAt(next_pt)->Hidden);

  return (EdgePoint);
}

} // namespace tesseract