Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/compare.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/compare.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,3650 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file compare.c + * <pre> + * + * Test for pix equality + * l_int32 pixEqual() + * l_int32 pixEqualWithAlpha() + * l_int32 pixEqualWithCmap() + * l_int32 cmapEqual() + * l_int32 pixUsesCmapColor() + * + * Binary correlation + * l_int32 pixCorrelationBinary() + * + * Difference of two images of same size + * l_int32 pixDisplayDiff() + * l_int32 pixDisplayDiffBinary() + * l_int32 pixCompareBinary() + * l_int32 pixCompareGrayOrRGB() + * l_int32 pixCompareGray() + * l_int32 pixCompareRGB() + * l_int32 pixCompareTiled() + * + * Other measures of the difference of two images of the same size + * NUMA *pixCompareRankDifference() + * l_int32 pixTestForSimilarity() + * l_int32 pixGetDifferenceStats() + * NUMA *pixGetDifferenceHistogram() + * l_int32 pixGetPerceptualDiff() + * l_int32 pixGetPSNR() + * + * Comparison of photo regions by histogram + * l_int32 pixaComparePhotoRegionsByHisto() -- top-level + * l_int32 pixComparePhotoRegionsByHisto() -- top-level for 2 + * l_int32 pixGenPhotoHistos() + * PIX *pixPadToCenterCentroid() + * l_int32 pixCentroid8() + * l_int32 pixDecideIfPhotoImage() + * static l_int32 findHistoGridDimensions() + * l_int32 compareTilesByHisto() + * + * l_int32 pixCompareGrayByHisto() -- top-level for 2 + * static l_int32 pixCompareTilesByHisto() + * l_int32 pixCropAlignedToCentroid() + * + * l_uint8 *l_compressGrayHistograms() + * NUMAA *l_uncompressGrayHistograms() + * + * Translated images at the same resolution + * l_int32 pixCompareWithTranslation() + * l_int32 pixBestCorrelation() + * + * For comparing images using tiled histograms, essentially all the + * computation goes into deciding if a region of an image is a photo, + * whether that photo region is amenable to similarity measurements + * using histograms, and finally the calculation of the gray histograms + * for each of the tiled regions. The actual comparison is essentially + * instantaneous. Therefore, with a large number of images to compare + * with each other, it is important to first calculate the histograms + * for each image. Then the comparisons, which go as the square of the + * number of images, actually takes no time. + * + * A high level function that takes a pixa of images and does + * all comparisons, pixaComparePhotosByHisto(), uses this split + * approach. It pads the images so that the centroid is in the center, + * which will allow the tiles to be better aligned. + * + * For testing purposes, two functions are given that do all the work + * to compare just two photo regions: + * * pixComparePhotoRegionsByHisto() uses the split approach, qualifying + * the images first with pixGenPhotoHistos(), and then comparing + * with compareTilesByHisto(). + * * pixCompareGrayByHisto() aligns the two images by centroid + * and calls pixCompareTilesByHisto() to generate the histograms + * and do the comparison. + * + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <string.h> +#include <math.h> +#include "allheaders.h" + + /* Small enough to consider equal to 0.0, for plot output */ +static const l_float32 TINY = 0.00001f; + +static l_ok findHistoGridDimensions(l_int32 n, l_int32 w, l_int32 h, + l_int32 *pnx, l_int32 *pny, l_int32 debug); +static l_ok pixCompareTilesByHisto(PIX *pix1, PIX *pix2, l_int32 maxgray, + l_int32 factor, l_int32 n, + l_float32 *pscore, PIXA *pixadebug); + +/*------------------------------------------------------------------* + * Test for pix equality * + *------------------------------------------------------------------*/ +/*! + * \brief pixEqual() + * + * \param[in] pix1 + * \param[in] pix2 + * \param[out] psame 1 if same; 0 if different + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) Equality is defined as having the same pixel values for + * each respective image pixel. + * (2) This works on two pix of any depth. If one or both pix + * have a colormap, the depths can be different and the + * two pix can still be equal. + * (3) This ignores the alpha component for 32 bpp images. + * (4) If both pix have colormaps and the depths are equal, + * use the pixEqualWithCmap() function, which does a fast + * comparison if the colormaps are identical and a relatively + * slow comparison otherwise. + * (5) In all other cases, any existing colormaps must first be + * removed before doing pixel comparison. After the colormaps + * are removed, the resulting two images must have the same depth. + * The "lowest common denominator" is RGB, but this is only + * chosen when necessary, or when both have colormaps but + * different depths. + * (6) For images without colormaps that are not 32 bpp, all bits + * in the image part of the data array must be identical. + * </pre> + */ +l_ok +pixEqual(PIX *pix1, + PIX *pix2, + l_int32 *psame) +{ + return pixEqualWithAlpha(pix1, pix2, 0, psame); +} + + +/*! + * \brief pixEqualWithAlpha() + * + * \param[in] pix1 + * \param[in] pix2 + * \param[in] use_alpha 1 to compare alpha in RGBA; 0 to ignore + * \param[out] psame 1 if same; 0 if different + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) See notes in pixEqual(). + * (2) This is more general than pixEqual(), in that for 32 bpp + * RGBA images, where spp = 4, you can optionally include + * the alpha component in the comparison. + * </pre> + */ +l_ok +pixEqualWithAlpha(PIX *pix1, + PIX *pix2, + l_int32 use_alpha, + l_int32 *psame) +{ +l_int32 w1, h1, d1, w2, h2, d2, wpl1, wpl2; +l_int32 spp1, spp2, i, j, color, mismatch, opaque; +l_int32 fullwords, linebits, endbits; +l_uint32 endmask, wordmask; +l_uint32 *data1, *data2, *line1, *line2; +PIX *pixs1, *pixs2, *pixt1, *pixt2, *pixalpha; +PIXCMAP *cmap1, *cmap2; + + if (!psame) + return ERROR_INT("psame not defined", __func__, 1); + *psame = 0; /* init to not equal */ + if (!pix1 || !pix2) + return ERROR_INT("pix1 and pix2 not both defined", __func__, 1); + pixGetDimensions(pix1, &w1, &h1, &d1); + pixGetDimensions(pix2, &w2, &h2, &d2); + if (w1 != w2 || h1 != h2) { + L_INFO("pix sizes differ\n", __func__); + return 0; + } + + /* Suppose the use_alpha flag is true. + * If only one of two 32 bpp images has spp == 4, we call that + * a "mismatch" of the alpha component. In the case of a mismatch, + * if the 4 bpp pix does not have all alpha components opaque (255), + * the images are not-equal. However if they are all opaque, + * this image is equivalent to spp == 3, so we allow the + * comparison to go forward, testing only for the RGB equality. */ + spp1 = pixGetSpp(pix1); + spp2 = pixGetSpp(pix2); + mismatch = 0; + if (use_alpha && d1 == 32 && d2 == 32) { + mismatch = ((spp1 == 4 && spp2 != 4) || (spp1 != 4 && spp2 == 4)); + if (mismatch) { + pixalpha = (spp1 == 4) ? pix1 : pix2; + pixAlphaIsOpaque(pixalpha, &opaque); + if (!opaque) { + L_INFO("just one pix has a non-opaque alpha layer\n", __func__); + return 0; + } + } + } + + cmap1 = pixGetColormap(pix1); + cmap2 = pixGetColormap(pix2); + if (!cmap1 && !cmap2 && (d1 != d2) && (d1 == 32 || d2 == 32)) { + L_INFO("no colormaps, pix depths unequal, and one of them is RGB\n", + __func__); + return 0; + } + + if (cmap1 && cmap2 && (d1 == d2)) /* use special function */ + return pixEqualWithCmap(pix1, pix2, psame); + + /* Must remove colormaps if they exist, and in the process + * end up with the resulting images having the same depth. */ + if (cmap1 && !cmap2) { + pixUsesCmapColor(pix1, &color); + if (color && d2 <= 8) /* can't be equal */ + return 0; + if (d2 < 8) + pixs2 = pixConvertTo8(pix2, FALSE); + else + pixs2 = pixClone(pix2); + if (d2 <= 8) + pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_GRAYSCALE); + else + pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR); + } else if (!cmap1 && cmap2) { + pixUsesCmapColor(pix2, &color); + if (color && d1 <= 8) /* can't be equal */ + return 0; + if (d1 < 8) + pixs1 = pixConvertTo8(pix1, FALSE); + else + pixs1 = pixClone(pix1); + if (d1 <= 8) + pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_GRAYSCALE); + else + pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR); + } else if (cmap1 && cmap2) { /* depths not equal; use rgb */ + pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR); + pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR); + } else { /* no colormaps */ + pixs1 = pixClone(pix1); + pixs2 = pixClone(pix2); + } + + /* OK, we have no colormaps, but the depths may still be different */ + d1 = pixGetDepth(pixs1); + d2 = pixGetDepth(pixs2); + if (d1 != d2) { + if (d1 == 16 || d2 == 16) { + L_INFO("one pix is 16 bpp\n", __func__); + pixDestroy(&pixs1); + pixDestroy(&pixs2); + return 0; + } + pixt1 = pixConvertLossless(pixs1, 8); + pixt2 = pixConvertLossless(pixs2, 8); + if (!pixt1 || !pixt2) { + L_INFO("failure to convert to 8 bpp\n", __func__); + pixDestroy(&pixs1); + pixDestroy(&pixs2); + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return 0; + } + } else { + pixt1 = pixClone(pixs1); + pixt2 = pixClone(pixs2); + } + pixDestroy(&pixs1); + pixDestroy(&pixs2); + + /* No colormaps, equal depths; do pixel comparisons */ + d1 = pixGetDepth(pixt1); + d2 = pixGetDepth(pixt2); + wpl1 = pixGetWpl(pixt1); + wpl2 = pixGetWpl(pixt2); + data1 = pixGetData(pixt1); + data2 = pixGetData(pixt2); + + if (d1 == 32) { /* test either RGB or RGBA pixels */ + if (use_alpha && !mismatch) + wordmask = (spp1 == 3) ? 0xffffff00 : 0xffffffff; + else + wordmask = 0xffffff00; + for (i = 0; i < h1; i++) { + line1 = data1 + wpl1 * i; + line2 = data2 + wpl2 * i; + for (j = 0; j < wpl1; j++) { + if ((*line1 ^ *line2) & wordmask) { + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return 0; + } + line1++; + line2++; + } + } + } else { /* all bits count */ + linebits = d1 * w1; + fullwords = linebits / 32; + endbits = linebits & 31; + endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits)); + for (i = 0; i < h1; i++) { + line1 = data1 + wpl1 * i; + line2 = data2 + wpl2 * i; + for (j = 0; j < fullwords; j++) { + if (*line1 ^ *line2) { + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return 0; + } + line1++; + line2++; + } + if (endbits) { + if ((*line1 ^ *line2) & endmask) { + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return 0; + } + } + } + } + + pixDestroy(&pixt1); + pixDestroy(&pixt2); + *psame = 1; + return 0; +} + + +/*! + * \brief pixEqualWithCmap() + * + * \param[in] pix1 + * \param[in] pix2 + * \param[out] psame + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This returns same = TRUE if the images have identical content. + * (2) Both pix must have a colormap, and be of equal size and depth. + * If these conditions are not satisfied, it is not an error; + * the returned result is same = FALSE. + * (3) We then check whether the colormaps are the same; if so, + * the comparison proceeds 32 bits at a time. + * (4) If the colormaps are different, the comparison is done by + * slow brute force. + * </pre> + */ +l_ok +pixEqualWithCmap(PIX *pix1, + PIX *pix2, + l_int32 *psame) +{ +l_int32 d, w, h, wpl1, wpl2, i, j, linebits, fullwords, endbits; +l_int32 rval1, rval2, gval1, gval2, bval1, bval2, samecmaps; +l_uint32 endmask, val1, val2; +l_uint32 *data1, *data2, *line1, *line2; +PIXCMAP *cmap1, *cmap2; + + if (!psame) + return ERROR_INT("&same not defined", __func__, 1); + *psame = 0; + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + + if (pixSizesEqual(pix1, pix2) == 0) + return 0; + cmap1 = pixGetColormap(pix1); + cmap2 = pixGetColormap(pix2); + if (!cmap1 || !cmap2) { + L_INFO("both images don't have colormap\n", __func__); + return 0; + } + pixGetDimensions(pix1, &w, &h, &d); + if (d != 1 && d != 2 && d != 4 && d != 8) { + L_INFO("pix depth not in {1, 2, 4, 8}\n", __func__); + return 0; + } + + cmapEqual(cmap1, cmap2, 3, &samecmaps); + if (samecmaps == TRUE) { /* colormaps are identical; compare by words */ + linebits = d * w; + wpl1 = pixGetWpl(pix1); + wpl2 = pixGetWpl(pix2); + data1 = pixGetData(pix1); + data2 = pixGetData(pix2); + fullwords = linebits / 32; + endbits = linebits & 31; + endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits)); + for (i = 0; i < h; i++) { + line1 = data1 + wpl1 * i; + line2 = data2 + wpl2 * i; + for (j = 0; j < fullwords; j++) { + if (*line1 ^ *line2) + return 0; + line1++; + line2++; + } + if (endbits) { + if ((*line1 ^ *line2) & endmask) + return 0; + } + } + *psame = 1; + return 0; + } + + /* Colormaps aren't identical; compare pixel by pixel */ + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + pixGetPixel(pix1, j, i, &val1); + pixGetPixel(pix2, j, i, &val2); + pixcmapGetColor(cmap1, val1, &rval1, &gval1, &bval1); + pixcmapGetColor(cmap2, val2, &rval2, &gval2, &bval2); + if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2) + return 0; + } + } + + *psame = 1; + return 0; +} + + +/*! + * \brief cmapEqual() + * + * \param[in] cmap1 + * \param[in] cmap2 + * \param[in] ncomps 3 for RGB, 4 for RGBA + * \param[out] psame + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This returns %same = TRUE if the colormaps have identical entries. + * (2) If %ncomps == 4, the alpha components of the colormaps are also + * compared. + * </pre> + */ +l_ok +cmapEqual(PIXCMAP *cmap1, + PIXCMAP *cmap2, + l_int32 ncomps, + l_int32 *psame) +{ +l_int32 n1, n2, i, rval1, rval2, gval1, gval2, bval1, bval2, aval1, aval2; + + if (!psame) + return ERROR_INT("&same not defined", __func__, 1); + *psame = FALSE; + if (!cmap1) + return ERROR_INT("cmap1 not defined", __func__, 1); + if (!cmap2) + return ERROR_INT("cmap2 not defined", __func__, 1); + if (ncomps != 3 && ncomps != 4) + return ERROR_INT("ncomps not 3 or 4", __func__, 1); + + n1 = pixcmapGetCount(cmap1); + n2 = pixcmapGetCount(cmap2); + if (n1 != n2) { + L_INFO("colormap sizes are different\n", __func__); + return 0; + } + + for (i = 0; i < n1; i++) { + pixcmapGetRGBA(cmap1, i, &rval1, &gval1, &bval1, &aval1); + pixcmapGetRGBA(cmap2, i, &rval2, &gval2, &bval2, &aval2); + if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2) + return 0; + if (ncomps == 4 && aval1 != aval2) + return 0; + } + *psame = TRUE; + return 0; +} + + +/*! + * \brief pixUsesCmapColor() + * + * \param[in] pixs any depth, colormap + * \param[out] pcolor TRUE if color found + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This returns color = TRUE if three things are obtained: + * (a) the pix has a colormap + * (b) the colormap has at least one color entry + * (c) a color entry is actually used + * (2) It is used in pixEqual() for comparing two images, in a + * situation where it is required to know if the colormap + * has color entries that are actually used in the image. + * </pre> + */ +l_ok +pixUsesCmapColor(PIX *pixs, + l_int32 *pcolor) +{ +l_int32 n, i, rval, gval, bval, numpix; +NUMA *na; +PIXCMAP *cmap; + + if (!pcolor) + return ERROR_INT("&color not defined", __func__, 1); + *pcolor = 0; + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + + if ((cmap = pixGetColormap(pixs)) == NULL) + return 0; + + pixcmapHasColor(cmap, pcolor); + if (*pcolor == 0) /* no color */ + return 0; + + /* The cmap has color entries. Are they used? */ + na = pixGetGrayHistogram(pixs, 1); + n = pixcmapGetCount(cmap); + for (i = 0; i < n; i++) { + pixcmapGetColor(cmap, i, &rval, &gval, &bval); + numaGetIValue(na, i, &numpix); + if ((rval != gval || rval != bval) && numpix) { /* color found! */ + *pcolor = 1; + break; + } + } + numaDestroy(&na); + + return 0; +} + + +/*------------------------------------------------------------------* + * Binary correlation * + *------------------------------------------------------------------*/ +/*! + * \brief pixCorrelationBinary() + * + * \param[in] pix1 1 bpp + * \param[in] pix2 1 bpp + * \param[out] pval correlation + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) The correlation is a number between 0.0 and 1.0, + * based on foreground similarity: + * (|1 AND 2|)**2 + * correlation = -------------- + * |1| * |2| + * where |x| is the count of foreground pixels in image x. + * If the images are identical, this is 1.0. + * If they have no fg pixels in common, this is 0.0. + * If one or both images have no fg pixels, the correlation is 0.0. + * (2) Typically the two images are of equal size, but this + * is not enforced. Instead, the UL corners are aligned. + * </pre> + */ +l_ok +pixCorrelationBinary(PIX *pix1, + PIX *pix2, + l_float32 *pval) +{ +l_int32 count1, count2, countn; +l_int32 *tab8; +PIX *pixn; + + if (!pval) + return ERROR_INT("&pval not defined", __func__, 1); + *pval = 0.0; + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + + tab8 = makePixelSumTab8(); + pixCountPixels(pix1, &count1, tab8); + pixCountPixels(pix2, &count2, tab8); + if (count1 == 0 || count2 == 0) { + LEPT_FREE(tab8); + return 0; + } + pixn = pixAnd(NULL, pix1, pix2); + pixCountPixels(pixn, &countn, tab8); + *pval = (l_float32)countn * (l_float32)countn / + ((l_float32)count1 * (l_float32)count2); + LEPT_FREE(tab8); + pixDestroy(&pixn); + return 0; +} + + +/*------------------------------------------------------------------* + * Difference of two images * + *------------------------------------------------------------------*/ +/*! + * \brief pixDisplayDiff() + * + * \param[in] pix1 any depth + * \param[in] pix2 any depth + * \param[in] showall 1 to display input images; 0 to only display result + * \param[in] mindiff min difference to identify pixel + * \param[in] diffcolor color of pixel indicating difference >= mindiff + * \return pixd 32 bpp rgb, or NULL on error + * + * <pre> + * Notes: + * (1) This aligns the UL corners of pix1 and pix2, crops to the + * overlapping pixels, and shows which pixels have a significant + * difference in value. + * (2) Requires %pix1 and %pix2 to have the same depth. + * (3) If rgb, a pixel is identified as different if any component + * values of the corresponding pixels equals or exceeds %mindiff. + * (4) %diffcolor is in format 0xrrggbbaa. + * (5) If %pix1 and %pix2 are 1 bpp, ignores %mindiff and %diffcolor, + * and uses the result of pixDisplayDiffBinary(). + * </pre> + */ +PIX * +pixDisplayDiff(PIX *pix1, + PIX *pix2, + l_int32 showall, + l_int32 mindiff, + l_uint32 diffcolor) +{ +l_int32 i, j, w1, h1, d1, w2, h2, d2, minw, minh, wpl1, wpl2, wpl3; +l_int32 rval1, gval1, bval1, rval2, gval2, bval2; +l_uint32 val1, val2; +l_uint32 *data1, *data2, *data3, *line1, *line2, *line3; +PIX *pix3 = NULL, *pix4 = NULL, *pixd; +PIXA *pixa1; + + if (!pix1 || !pix2) + return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL); + pixGetDimensions(pix1, &w1, &h1, &d1); + pixGetDimensions(pix2, &w2, &h2, &d2); + if (d1 != d2) + return (PIX *)ERROR_PTR("unequal depths", __func__, NULL); + if (mindiff <= 0) + return (PIX *)ERROR_PTR("mindiff must be > 0", __func__, NULL); + + if (d1 == 1) { + pix3 = pixDisplayDiffBinary(pix1, pix2); + pixd = pixConvertTo32(pix3); + pixDestroy(&pix3); + } else { + minw = L_MIN(w1, w2); + minh = L_MIN(h1, h2); + pix3 = pixConvertTo32(pix1); + pix4 = pixConvertTo32(pix2); + pixd = pixCreate(minw, minh, 32); + pixRasterop(pixd, 0, 0, minw, minh, PIX_SRC, pix3, 0, 0); + data1 = pixGetData(pix3); + wpl1 = pixGetWpl(pix3); + data2 = pixGetData(pix4); + wpl2 = pixGetWpl(pix4); + data3 = pixGetData(pixd); + wpl3 = pixGetWpl(pixd); + for (i = 0; i < minh; i++) { + line1 = data1 + i * wpl1; + line2 = data2 + i * wpl2; + line3 = data3 + i * wpl3; + for (j = 0; j < minw; j++) { + val1 = GET_DATA_FOUR_BYTES(line1, j); + val2 = GET_DATA_FOUR_BYTES(line2, j); + extractRGBValues(val1, &rval1, &gval1, &bval1); + extractRGBValues(val2, &rval2, &gval2, &bval2); + if (L_ABS(rval1 - rval2) >= mindiff || + L_ABS(gval1 - gval2) >= mindiff || + L_ABS(bval1 - bval2) >= mindiff) + SET_DATA_FOUR_BYTES(line3, j, diffcolor); + } + } + } + + if (showall) { + pixa1 = pixaCreate(3); + if (d1 == 1) { + pixaAddPix(pixa1, pix1, L_COPY); + pixaAddPix(pixa1, pix2, L_COPY); + } else { + pixaAddPix(pixa1, pix3, L_INSERT); + pixaAddPix(pixa1, pix4, L_INSERT); + } + pixaAddPix(pixa1, pixd, L_INSERT); /* save diff image */ + pixd = pixaDisplayTiledInColumns(pixa1, 2, 1.0, 30, 2); /* all 3 */ + pixaDestroy(&pixa1); + } else if (d1 != 1) { + pixDestroy(&pix3); + pixDestroy(&pix4); + } + return pixd; +} + + +/*! + * \brief pixDisplayDiffBinary() + * + * \param[in] pix1 1 bpp + * \param[in] pix2 1 bpp + * \return pixd 4 bpp cmapped, or NULL on error + * + * <pre> + * Notes: + * (1) This gives a color representation of the difference between + * pix1 and pix2. The color difference depends on the order. + * The pixels in pixd have 4 colors: + * * unchanged: black (on), white (off) + * * on in pix1, off in pix2: red + * * on in pix2, off in pix1: green + * (2) This aligns the UL corners of pix1 and pix2, and crops + * to the overlapping pixels. + * </pre> + */ +PIX * +pixDisplayDiffBinary(PIX *pix1, + PIX *pix2) +{ +l_int32 w1, h1, d1, w2, h2, d2, minw, minh; +PIX *pixt, *pixd; +PIXCMAP *cmap; + + if (!pix1 || !pix2) + return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL); + pixGetDimensions(pix1, &w1, &h1, &d1); + pixGetDimensions(pix2, &w2, &h2, &d2); + if (d1 != 1 || d2 != 1) + return (PIX *)ERROR_PTR("pix1 and pix2 not 1 bpp", __func__, NULL); + minw = L_MIN(w1, w2); + minh = L_MIN(h1, h2); + + pixd = pixCreate(minw, minh, 4); + cmap = pixcmapCreate(4); + pixcmapAddColor(cmap, 255, 255, 255); /* initialized to white */ + pixcmapAddColor(cmap, 0, 0, 0); + pixcmapAddColor(cmap, 255, 0, 0); + pixcmapAddColor(cmap, 0, 255, 0); + pixSetColormap(pixd, cmap); + + pixt = pixAnd(NULL, pix1, pix2); + pixPaintThroughMask(pixd, pixt, 0, 0, 0x0); /* black */ + pixSubtract(pixt, pix1, pix2); + pixPaintThroughMask(pixd, pixt, 0, 0, 0xff000000); /* red */ + pixSubtract(pixt, pix2, pix1); + pixPaintThroughMask(pixd, pixt, 0, 0, 0x00ff0000); /* green */ + pixDestroy(&pixt); + return pixd; +} + + +/*! + * \brief pixCompareBinary() + * + * \param[in] pix1 1 bpp + * \param[in] pix2 1 bpp + * \param[in] comptype L_COMPARE_XOR, L_COMPARE_SUBTRACT + * \param[out] pfract fraction of pixels that are different + * \param[out] ppixdiff [optional] pix of difference + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) The two images are aligned at the UL corner, and do not + * need to be the same size. + * (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1. + * (3) The total number of pixels is determined by pix1. + * (4) On error, the returned fraction is 1.0. + * </pre> + */ +l_ok +pixCompareBinary(PIX *pix1, + PIX *pix2, + l_int32 comptype, + l_float32 *pfract, + PIX **ppixdiff) +{ +l_int32 w, h, count; +PIX *pixt; + + if (ppixdiff) *ppixdiff = NULL; + if (!pfract) + return ERROR_INT("&pfract not defined", __func__, 1); + *pfract = 1.0; /* initialize to max difference */ + if (!pix1 || pixGetDepth(pix1) != 1) + return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1); + if (!pix2 || pixGetDepth(pix2) != 1) + return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1); + if (comptype != L_COMPARE_XOR && comptype != L_COMPARE_SUBTRACT) + return ERROR_INT("invalid comptype", __func__, 1); + + if (comptype == L_COMPARE_XOR) + pixt = pixXor(NULL, pix1, pix2); + else /* comptype == L_COMPARE_SUBTRACT) */ + pixt = pixSubtract(NULL, pix1, pix2); + pixCountPixels(pixt, &count, NULL); + pixGetDimensions(pix1, &w, &h, NULL); + *pfract = (l_float32)(count) / (l_float32)(w * h); + + if (ppixdiff) + *ppixdiff = pixt; + else + pixDestroy(&pixt); + return 0; +} + + +/*! + * \brief pixCompareGrayOrRGB() + * + * \param[in] pix1 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped + * \param[in] pix2 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped + * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF + * \param[in] plottype gplot plot output type, or 0 for no plot + * \param[out] psame [optional] 1 if pixel values are identical + * \param[out] pdiff [optional] average difference + * \param[out] prmsdiff [optional] rms of difference + * \param[out] ppixdiff [optional] pix of difference + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) The two images are aligned at the UL corner, and do not + * need to be the same size. If they are not the same size, + * the comparison will be made over overlapping pixels. + * (2) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (3) If RGB, each component is compared separately. + * (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1 + * and the absolute value is taken. + * (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1 + * and the result is clipped to 0. + * (6) The plot output types are specified in gplot.h. + * Use 0 if no difference plot is to be made. + * (7) If the images are pixelwise identical, no difference + * plot is made, even if requested. The result (TRUE or FALSE) + * is optionally returned in the parameter 'same'. + * (8) The average difference (either subtracting or absolute value) + * is optionally returned in the parameter 'diff'. + * (9) The RMS difference is optionally returned in the + * parameter 'rmsdiff'. For RGB, we return the average of + * the RMS differences for each of the components. + * (10) Because pixel values are compared, pix1 and pix2 can be equal when: + * * they are both gray with different depth + * * one is colormapped and the other is not + * * they are both colormapped and have different size colormaps + * </pre> + */ +l_ok +pixCompareGrayOrRGB(PIX *pix1, + PIX *pix2, + l_int32 comptype, + l_int32 plottype, + l_int32 *psame, + l_float32 *pdiff, + l_float32 *prmsdiff, + PIX **ppixdiff) +{ +l_int32 retval, d1, d2; +PIX *pixt1, *pixt2, *pixs1, *pixs2; + + if (psame) *psame = 0; + if (pdiff) *pdiff = 255.0; + if (prmsdiff) *prmsdiff = 255.0; + if (ppixdiff) *ppixdiff = NULL; + if (!pix1 || pixGetDepth(pix1) == 1) + return ERROR_INT("pix1 not defined or 1 bpp", __func__, 1); + if (!pix2 || pixGetDepth(pix2) == 1) + return ERROR_INT("pix2 not defined or 1 bpp", __func__, 1); + if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) + return ERROR_INT("invalid comptype", __func__, 1); + if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) + return ERROR_INT("invalid plottype", __func__, 1); + + pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); + pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); + d1 = pixGetDepth(pixt1); + d2 = pixGetDepth(pixt2); + if (d1 < 8) + pixs1 = pixConvertTo8(pixt1, FALSE); + else + pixs1 = pixClone(pixt1); + if (d2 < 8) + pixs2 = pixConvertTo8(pixt2, FALSE); + else + pixs2 = pixClone(pixt2); + pixDestroy(&pixt1); + pixDestroy(&pixt2); + d1 = pixGetDepth(pixs1); + d2 = pixGetDepth(pixs2); + if (d1 != d2) { + pixDestroy(&pixs1); + pixDestroy(&pixs2); + return ERROR_INT("intrinsic depths are not equal", __func__, 1); + } + + if (d1 == 8 || d1 == 16) + retval = pixCompareGray(pixs1, pixs2, comptype, plottype, psame, + pdiff, prmsdiff, ppixdiff); + else /* d1 == 32 */ + retval = pixCompareRGB(pixs1, pixs2, comptype, plottype, psame, + pdiff, prmsdiff, ppixdiff); + pixDestroy(&pixs1); + pixDestroy(&pixs2); + return retval; +} + + +/*! + * \brief pixCompareGray() + * + * \param[in] pix1 8 or 16 bpp, not cmapped + * \param[in] pix2 8 or 16 bpp, not cmapped + * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF + * \param[in] plottype gplot plot output type, or 0 for no plot + * \param[out] psame [optional] 1 if pixel values are identical + * \param[out] pdiff [optional] average difference + * \param[out] prmsdiff [optional] rms of difference + * \param[out] ppixdiff [optional] pix of difference + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) See pixCompareGrayOrRGB() for details. + * (2) Use pixCompareGrayOrRGB() if the input pix are colormapped. + * (3) Note: setting %plottype > 0 can result in writing named + * output files. + * </pre> + */ +l_ok +pixCompareGray(PIX *pix1, + PIX *pix2, + l_int32 comptype, + l_int32 plottype, + l_int32 *psame, + l_float32 *pdiff, + l_float32 *prmsdiff, + PIX **ppixdiff) +{ +char buf[64]; +static l_atomic index = 0; +l_int32 d1, d2, same, first, last; +GPLOT *gplot; +NUMA *na, *nac; +PIX *pixt; + + if (psame) *psame = 0; + if (pdiff) *pdiff = 255.0; + if (prmsdiff) *prmsdiff = 255.0; + if (ppixdiff) *ppixdiff = NULL; + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + d1 = pixGetDepth(pix1); + d2 = pixGetDepth(pix2); + if ((d1 != d2) || (d1 != 8 && d1 != 16)) + return ERROR_INT("depths unequal or not 8 or 16 bpp", __func__, 1); + if (pixGetColormap(pix1) || pixGetColormap(pix2)) + return ERROR_INT("pix1 and/or pix2 are colormapped", __func__, 1); + if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) + return ERROR_INT("invalid comptype", __func__, 1); + if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) + return ERROR_INT("invalid plottype", __func__, 1); + + lept_mkdir("lept/comp"); + + if (comptype == L_COMPARE_SUBTRACT) + pixt = pixSubtractGray(NULL, pix1, pix2); + else /* comptype == L_COMPARE_ABS_DIFF) */ + pixt = pixAbsDifference(pix1, pix2); + + pixZero(pixt, &same); + if (same) + L_INFO("Images are pixel-wise identical\n", __func__); + if (psame) *psame = same; + + if (pdiff) + pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_MEAN_ABSVAL, pdiff); + + /* Don't bother to plot if the images are the same */ + if (plottype && !same) { + L_INFO("Images differ: output plots will be generated\n", __func__); + na = pixGetGrayHistogram(pixt, 1); + numaGetNonzeroRange(na, TINY, &first, &last); + nac = numaClipToInterval(na, 0, last); + snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d", index); + gplot = gplotCreate(buf, plottype, + "Pixel Difference Histogram", "diff val", + "number of pixels"); + gplotAddPlot(gplot, NULL, nac, GPLOT_LINES, "gray"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d.png", + index++); + l_fileDisplay(buf, 100, 100, 1.0); + numaDestroy(&na); + numaDestroy(&nac); + } + + if (ppixdiff) + *ppixdiff = pixCopy(NULL, pixt); + + if (prmsdiff) { + if (comptype == L_COMPARE_SUBTRACT) { /* wrong type for rms diff */ + pixDestroy(&pixt); + pixt = pixAbsDifference(pix1, pix2); + } + pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, prmsdiff); + } + + pixDestroy(&pixt); + return 0; +} + + +/*! + * \brief pixCompareRGB() + * + * \param[in] pix1 32 bpp rgb + * \param[in] pix2 32 bpp rgb + * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF + * \param[in] plottype gplot plot output type, or 0 for no plot + * \param[out] psame [optional] 1 if pixel values are identical + * \param[out] pdiff [optional] average difference + * \param[out] prmsdiff [optional] rms of difference + * \param[out] ppixdiff [optional] pix of difference + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) See pixCompareGrayOrRGB() for details. + * (2) Note: setting %plottype > 0 can result in writing named + * output files. + * </pre> + */ +l_ok +pixCompareRGB(PIX *pix1, + PIX *pix2, + l_int32 comptype, + l_int32 plottype, + l_int32 *psame, + l_float32 *pdiff, + l_float32 *prmsdiff, + PIX **ppixdiff) +{ +char buf[64]; +static l_atomic index = 0; +l_int32 rsame, gsame, bsame, same, first, rlast, glast, blast, last; +l_float32 rdiff, gdiff, bdiff; +GPLOT *gplot; +NUMA *nar, *nag, *nab, *narc, *nagc, *nabc; +PIX *pixr1, *pixr2, *pixg1, *pixg2, *pixb1, *pixb2; +PIX *pixr, *pixg, *pixb; + + if (psame) *psame = 0; + if (pdiff) *pdiff = 0.0; + if (prmsdiff) *prmsdiff = 0.0; + if (ppixdiff) *ppixdiff = NULL; + if (!pix1 || pixGetDepth(pix1) != 32) + return ERROR_INT("pix1 not defined or not 32 bpp", __func__, 1); + if (!pix2 || pixGetDepth(pix2) != 32) + return ERROR_INT("pix2 not defined or not ew bpp", __func__, 1); + if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF) + return ERROR_INT("invalid comptype", __func__, 1); + if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS) + return ERROR_INT("invalid plottype", __func__, 1); + + lept_mkdir("lept/comp"); + + pixr1 = pixGetRGBComponent(pix1, COLOR_RED); + pixr2 = pixGetRGBComponent(pix2, COLOR_RED); + pixg1 = pixGetRGBComponent(pix1, COLOR_GREEN); + pixg2 = pixGetRGBComponent(pix2, COLOR_GREEN); + pixb1 = pixGetRGBComponent(pix1, COLOR_BLUE); + pixb2 = pixGetRGBComponent(pix2, COLOR_BLUE); + if (comptype == L_COMPARE_SUBTRACT) { + pixr = pixSubtractGray(NULL, pixr1, pixr2); + pixg = pixSubtractGray(NULL, pixg1, pixg2); + pixb = pixSubtractGray(NULL, pixb1, pixb2); + } else { /* comptype == L_COMPARE_ABS_DIFF) */ + pixr = pixAbsDifference(pixr1, pixr2); + pixg = pixAbsDifference(pixg1, pixg2); + pixb = pixAbsDifference(pixb1, pixb2); + } + + pixZero(pixr, &rsame); + pixZero(pixg, &gsame); + pixZero(pixb, &bsame); + same = rsame && gsame && bsame; + if (same) + L_INFO("Images are pixel-wise identical\n", __func__); + if (psame) *psame = same; + + if (pdiff) { + pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_MEAN_ABSVAL, &rdiff); + pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_MEAN_ABSVAL, &gdiff); + pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_MEAN_ABSVAL, &bdiff); + *pdiff = (rdiff + gdiff + bdiff) / 3.0; + } + + /* Don't bother to plot if the images are the same */ + if (plottype && !same) { + L_INFO("Images differ: output plots will be generated\n", __func__); + nar = pixGetGrayHistogram(pixr, 1); + nag = pixGetGrayHistogram(pixg, 1); + nab = pixGetGrayHistogram(pixb, 1); + numaGetNonzeroRange(nar, TINY, &first, &rlast); + numaGetNonzeroRange(nag, TINY, &first, &glast); + numaGetNonzeroRange(nab, TINY, &first, &blast); + last = L_MAX(rlast, glast); + last = L_MAX(last, blast); + narc = numaClipToInterval(nar, 0, last); + nagc = numaClipToInterval(nag, 0, last); + nabc = numaClipToInterval(nab, 0, last); + snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d", index); + gplot = gplotCreate(buf, plottype, + "Pixel Difference Histogram", "diff val", + "number of pixels"); + gplotAddPlot(gplot, NULL, narc, GPLOT_LINES, "red"); + gplotAddPlot(gplot, NULL, nagc, GPLOT_LINES, "green"); + gplotAddPlot(gplot, NULL, nabc, GPLOT_LINES, "blue"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d.png", + index++); + l_fileDisplay(buf, 100, 100, 1.0); + numaDestroy(&nar); + numaDestroy(&nag); + numaDestroy(&nab); + numaDestroy(&narc); + numaDestroy(&nagc); + numaDestroy(&nabc); + } + + if (ppixdiff) + *ppixdiff = pixCreateRGBImage(pixr, pixg, pixb); + + if (prmsdiff) { + if (comptype == L_COMPARE_SUBTRACT) { + pixDestroy(&pixr); + pixDestroy(&pixg); + pixDestroy(&pixb); + pixr = pixAbsDifference(pixr1, pixr2); + pixg = pixAbsDifference(pixg1, pixg2); + pixb = pixAbsDifference(pixb1, pixb2); + } + pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &rdiff); + pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &gdiff); + pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &bdiff); + *prmsdiff = (rdiff + gdiff + bdiff) / 3.0; + } + + pixDestroy(&pixr1); + pixDestroy(&pixr2); + pixDestroy(&pixg1); + pixDestroy(&pixg2); + pixDestroy(&pixb1); + pixDestroy(&pixb2); + pixDestroy(&pixr); + pixDestroy(&pixg); + pixDestroy(&pixb); + return 0; +} + + +/*! + * \brief pixCompareTiled() + * + * \param[in] pix1 8 bpp or 32 bpp rgb + * \param[in] pix2 8 bpp 32 bpp rgb + * \param[in] sx, sy tile size; must be > 1 in each dimension + * \param[in] type L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE + * \param[out] ppixdiff pix of difference + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) With L_MEAN_ABSVAL, we compute for each tile the + * average abs value of the pixel component difference between + * the two (aligned) images. With L_ROOT_MEAN_SQUARE, we + * compute instead the rms difference over all components. + * (2) The two input pix must be the same depth. Comparison is made + * using UL corner alignment. + * (3) For 32 bpp, the distance between corresponding tiles + * is found by averaging the measured difference over all three + * components of each pixel in the tile. + * (4) The result, pixdiff, contains one pixel for each source tile. + * </pre> + */ +l_ok +pixCompareTiled(PIX *pix1, + PIX *pix2, + l_int32 sx, + l_int32 sy, + l_int32 type, + PIX **ppixdiff) +{ +l_int32 d1, d2, w, h; +PIX *pixt, *pixr, *pixg, *pixb; +PIX *pixrdiff, *pixgdiff, *pixbdiff; +PIXACC *pixacc; + + if (!ppixdiff) + return ERROR_INT("&pixdiff not defined", __func__, 1); + *ppixdiff = NULL; + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + d1 = pixGetDepth(pix1); + d2 = pixGetDepth(pix2); + if (d1 != d2) + return ERROR_INT("depths not equal", __func__, 1); + if (d1 != 8 && d1 != 32) + return ERROR_INT("pix1 not 8 or 32 bpp", __func__, 1); + if (d2 != 8 && d2 != 32) + return ERROR_INT("pix2 not 8 or 32 bpp", __func__, 1); + if (sx < 2 || sy < 2) + return ERROR_INT("sx and sy not both > 1", __func__, 1); + if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE) + return ERROR_INT("invalid type", __func__, 1); + + pixt = pixAbsDifference(pix1, pix2); + if (d1 == 8) { + *ppixdiff = pixGetAverageTiled(pixt, sx, sy, type); + } else { /* d1 == 32 */ + pixr = pixGetRGBComponent(pixt, COLOR_RED); + pixg = pixGetRGBComponent(pixt, COLOR_GREEN); + pixb = pixGetRGBComponent(pixt, COLOR_BLUE); + pixrdiff = pixGetAverageTiled(pixr, sx, sy, type); + pixgdiff = pixGetAverageTiled(pixg, sx, sy, type); + pixbdiff = pixGetAverageTiled(pixb, sx, sy, type); + pixGetDimensions(pixrdiff, &w, &h, NULL); + pixacc = pixaccCreate(w, h, 0); + pixaccAdd(pixacc, pixrdiff); + pixaccAdd(pixacc, pixgdiff); + pixaccAdd(pixacc, pixbdiff); + pixaccMultConst(pixacc, 1.f / 3.f); + *ppixdiff = pixaccFinal(pixacc, 8); + pixDestroy(&pixr); + pixDestroy(&pixg); + pixDestroy(&pixb); + pixDestroy(&pixrdiff); + pixDestroy(&pixgdiff); + pixDestroy(&pixbdiff); + pixaccDestroy(&pixacc); + } + pixDestroy(&pixt); + return 0; +} + + +/*------------------------------------------------------------------* + * Other measures of the difference of two images * + *------------------------------------------------------------------*/ +/*! + * \brief pixCompareRankDifference() + * + * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] factor subsampling factor; use 0 or 1 for no subsampling + * \return narank numa of rank difference, or NULL on error + * + * <pre> + * Notes: + * (1) This answers the question: if the pixel values in each + * component are compared by absolute difference, for + * any value of difference, what is the fraction of + * pixel pairs that have a difference of this magnitude + * or greater. For a difference of 0, the fraction is 1.0. + * In this sense, it is a mapping from pixel difference to + * rank order of difference. + * (2) The two images are aligned at the UL corner, and do not + * need to be the same size. If they are not the same size, + * the comparison will be made over overlapping pixels. + * (3) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (4) If RGB, pixel differences for each component are aggregated + * into a single histogram. + * </pre> + */ +NUMA * +pixCompareRankDifference(PIX *pix1, + PIX *pix2, + l_int32 factor) +{ +l_int32 i; +l_float32 *array1, *array2; +NUMA *nah, *nan, *nad; + + if (!pix1) + return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL); + if (!pix2) + return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL); + + if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL) + return (NUMA *)ERROR_PTR("na not made", __func__, NULL); + + nan = numaNormalizeHistogram(nah, 1.0); + array1 = numaGetFArray(nan, L_NOCOPY); + + nad = numaCreate(256); + numaSetCount(nad, 256); /* all initialized to 0.0 */ + array2 = numaGetFArray(nad, L_NOCOPY); + + /* Do rank accumulation on normalized histo of diffs */ + array2[0] = 1.0; + for (i = 1; i < 256; i++) + array2[i] = array2[i - 1] - array1[i - 1]; + + numaDestroy(&nah); + numaDestroy(&nan); + return nad; +} + + +/*! + * \brief pixTestForSimilarity() + * + * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] factor subsampling factor; use 0 or 1 for no subsampling + * \param[in] mindiff minimum pixel difference to be counted; > 0 + * \param[in] maxfract maximum fraction of pixels allowed to have + * diff greater than or equal to mindiff + * \param[in] maxave maximum average difference of pixels allowed for + * pixels with diff greater than or equal to + * mindiff, after subtracting mindiff + * \param[out] psimilar 1 if similar, 0 otherwise + * \param[in] details use 1 to give normalized histogram and other data + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This takes 2 pix that are the same size and determines using + * 3 input parameters if they are "similar". The first parameter + * %mindiff establishes a criterion of pixel-to-pixel similarity: + * two pixels are not similar if their difference in value is + * at least mindiff. Then %maxfract and %maxave are thresholds + * on the number and distribution of dissimilar pixels + * allowed for the two pix to be similar. If the pix are + * to be similar, neither threshold can be exceeded. + * (2) In setting the %maxfract and %maxave thresholds, you have + * these options: + * (a) Base the comparison only on %maxfract. Then set + * %maxave = 0.0 or 256.0. (If 0, we always ignore it.) + * (b) Base the comparison only on %maxave. Then set + * %maxfract = 1.0. + * (c) Base the comparison on both thresholds. + * (3) Example of values that can be expected at mindiff = 15 when + * comparing lossless png encoding with jpeg encoding, q=75: + * (smoothish bg) fractdiff = 0.01, avediff = 2.5 + * (natural scene) fractdiff = 0.13, avediff = 3.5 + * To identify these images as 'similar', select maxfract + * and maxave to be upper bounds of what you expect. + * (4) See pixGetDifferenceStats() for a discussion of why we subtract + * mindiff from the computed average diff of the nonsimilar pixels + * to get the 'avediff' returned by that function. + * (5) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (6) If RGB, the maximum difference between pixel components is + * saved in the histogram. + * </pre> + */ +l_ok +pixTestForSimilarity(PIX *pix1, + PIX *pix2, + l_int32 factor, + l_int32 mindiff, + l_float32 maxfract, + l_float32 maxave, + l_int32 *psimilar, + l_int32 details) +{ +l_float32 fractdiff, avediff; + + if (!psimilar) + return ERROR_INT("&similar not defined", __func__, 1); + *psimilar = 0; + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + if (pixSizesEqual(pix1, pix2) == 0) + return ERROR_INT("pix sizes not equal", __func__, 1); + if (mindiff <= 0) + return ERROR_INT("mindiff must be > 0", __func__, 1); + + if (pixGetDifferenceStats(pix1, pix2, factor, mindiff, + &fractdiff, &avediff, details)) + return ERROR_INT("diff stats not found", __func__, 1); + + if (maxave <= 0.0) maxave = 256.0; + if (fractdiff <= maxfract && avediff <= maxave) + *psimilar = 1; + return 0; +} + + +/*! + * \brief pixGetDifferenceStats() + * + * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] factor subsampling factor; use 0 or 1 for no subsampling + * \param[in] mindiff minimum pixel difference to be counted; > 0 + * \param[out] pfractdiff fraction of pixels with diff greater than or + * equal to mindiff + * \param[out] pavediff average difference of pixels with diff greater + * than or equal to mindiff, less mindiff + * \param[in] details use 1 to give normalized histogram and other data + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This takes a threshold %mindiff and describes the difference + * between two images in terms of two numbers: + * (a) the fraction of pixels, %fractdiff, whose difference + * equals or exceeds the threshold %mindiff, and + * (b) the average value %avediff of the difference in pixel value + * for the pixels in the set given by (a), after you subtract + * %mindiff. The reason for subtracting %mindiff is that + * you then get a useful measure for the rate of falloff + * of the distribution for larger differences. For example, + * if %mindiff = 10 and you find that %avediff = 2.5, it + * says that of the pixels with diff > 10, the average of + * their diffs is just mindiff + 2.5 = 12.5. This is a + * fast falloff in the histogram with increasing difference. + * (2) The two images are aligned at the UL corner, and do not + * need to be the same size. If they are not the same size, + * the comparison will be made over overlapping pixels. + * (3) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (4) If RGB, the maximum difference between pixel components is + * saved in the histogram. + * (5) Set %details == 1 to see the difference histogram and get + * an output that shows for each value of %mindiff, what are the + * minimum values required for fractdiff and avediff in order + * that the two pix will be considered similar. + * </pre> + */ +l_ok +pixGetDifferenceStats(PIX *pix1, + PIX *pix2, + l_int32 factor, + l_int32 mindiff, + l_float32 *pfractdiff, + l_float32 *pavediff, + l_int32 details) +{ +l_int32 i, first, last, diff; +l_float32 fract, ave; +l_float32 *array; +NUMA *nah, *nan, *nac; + + if (pfractdiff) *pfractdiff = 0.0; + if (pavediff) *pavediff = 0.0; + if (!pfractdiff) + return ERROR_INT("&fractdiff not defined", __func__, 1); + if (!pavediff) + return ERROR_INT("&avediff not defined", __func__, 1); + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + if (mindiff <= 0) + return ERROR_INT("mindiff must be > 0", __func__, 1); + + if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL) + return ERROR_INT("na not made", __func__, 1); + + if ((nan = numaNormalizeHistogram(nah, 1.0)) == NULL) { + numaDestroy(&nah); + return ERROR_INT("nan not made", __func__, 1); + } + array = numaGetFArray(nan, L_NOCOPY); + + if (details) { + lept_mkdir("lept/comp"); + numaGetNonzeroRange(nan, 0.0, &first, &last); + nac = numaClipToInterval(nan, first, last); + gplotSimple1(nac, GPLOT_PNG, "/tmp/lept/comp/histo", + "Difference histogram"); + l_fileDisplay("/tmp/lept/comp/histo.png", 500, 0, 1.0); + lept_stderr("\nNonzero values in normalized histogram:"); + numaWriteStderr(nac); + numaDestroy(&nac); + lept_stderr(" Mindiff fractdiff avediff\n"); + lept_stderr(" -----------------------------------\n"); + for (diff = 1; diff < L_MIN(2 * mindiff, last); diff++) { + fract = 0.0; + ave = 0.0; + for (i = diff; i <= last; i++) { + fract += array[i]; + ave += (l_float32)i * array[i]; + } + ave = (fract == 0.0) ? 0.0 : ave / fract; + ave -= diff; + lept_stderr("%5d %7.4f %7.4f\n", + diff, fract, ave); + } + lept_stderr(" -----------------------------------\n"); + } + + fract = 0.0; + ave = 0.0; + for (i = mindiff; i < 256; i++) { + fract += array[i]; + ave += (l_float32)i * array[i]; + } + ave = (fract == 0.0) ? 0.0 : ave / fract; + ave -= mindiff; + + *pfractdiff = fract; + *pavediff = ave; + + numaDestroy(&nah); + numaDestroy(&nan); + return 0; +} + + +/*! + * \brief pixGetDifferenceHistogram() + * + * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] factor subsampling factor; use 0 or 1 for no subsampling + * \return na Numa of histogram of differences, or NULL on error + * + * <pre> + * Notes: + * (1) The two images are aligned at the UL corner, and do not + * need to be the same size. If they are not the same size, + * the comparison will be made over overlapping pixels. + * (2) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (3) If RGB, the maximum difference between pixel components is + * saved in the histogram. + * </pre> + */ +NUMA * +pixGetDifferenceHistogram(PIX *pix1, + PIX *pix2, + l_int32 factor) +{ +l_int32 w1, h1, d1, w2, h2, d2, w, h, wpl1, wpl2; +l_int32 i, j, val, val1, val2; +l_int32 rval1, rval2, gval1, gval2, bval1, bval2; +l_int32 rdiff, gdiff, bdiff, maxdiff; +l_uint32 *data1, *data2, *line1, *line2; +l_float32 *array; +NUMA *na; +PIX *pixt1, *pixt2; + + if (!pix1) + return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL); + if (!pix2) + return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL); + d1 = pixGetDepth(pix1); + d2 = pixGetDepth(pix2); + if (d1 == 16 || d2 == 16) + return (NUMA *)ERROR_PTR("d == 16 not supported", __func__, NULL); + if (d1 < 8 && !pixGetColormap(pix1)) + return (NUMA *)ERROR_PTR("pix1 depth < 8 bpp and not cmapped", + __func__, NULL); + if (d2 < 8 && !pixGetColormap(pix2)) + return (NUMA *)ERROR_PTR("pix2 depth < 8 bpp and not cmapped", + __func__, NULL); + pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); + pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); + pixGetDimensions(pixt1, &w1, &h1, &d1); + pixGetDimensions(pixt2, &w2, &h2, &d2); + if (d1 != d2) { + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return (NUMA *)ERROR_PTR("pix depths not equal", __func__, NULL); + } + if (factor < 1) factor = 1; + + na = numaCreate(256); + numaSetCount(na, 256); /* all initialized to 0.0 */ + array = numaGetFArray(na, L_NOCOPY); + w = L_MIN(w1, w2); + h = L_MIN(h1, h2); + data1 = pixGetData(pixt1); + data2 = pixGetData(pixt2); + wpl1 = pixGetWpl(pixt1); + wpl2 = pixGetWpl(pixt2); + if (d1 == 8) { + for (i = 0; i < h; i += factor) { + line1 = data1 + i * wpl1; + line2 = data2 + i * wpl2; + for (j = 0; j < w; j += factor) { + val1 = GET_DATA_BYTE(line1, j); + val2 = GET_DATA_BYTE(line2, j); + val = L_ABS(val1 - val2); + array[val]++; + } + } + } else { /* d1 == 32 */ + for (i = 0; i < h; i += factor) { + line1 = data1 + i * wpl1; + line2 = data2 + i * wpl2; + for (j = 0; j < w; j += factor) { + extractRGBValues(line1[j], &rval1, &gval1, &bval1); + extractRGBValues(line2[j], &rval2, &gval2, &bval2); + rdiff = L_ABS(rval1 - rval2); + gdiff = L_ABS(gval1 - gval2); + bdiff = L_ABS(bval1 - bval2); + maxdiff = L_MAX(rdiff, gdiff); + maxdiff = L_MAX(maxdiff, bdiff); + array[maxdiff]++; + } + } + } + + pixDestroy(&pixt1); + pixDestroy(&pixt2); + return na; +} + + +/*! + * \brief pixGetPerceptualDiff() + * + * \param[in] pixs1 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] pixs2 8 bpp gray or 32 bpp rgb, or colormapped + * \param[in] sampling subsampling factor; use 0 or 1 for no subsampling + * \param[in] dilation size of grayscale or color Sel; odd + * \param[in] mindiff minimum pixel difference to be counted; > 0 + * \param[out] pfract fraction of pixels with diff greater than mindiff + * \param[out] ppixdiff1 [optional] showing difference (gray or color) + * \param[out] ppixdiff2 [optional] showing pixels of sufficient diff + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This takes 2 pix and determines, using 2 input parameters: + * * %dilation specifies the amount of grayscale or color + * dilation to apply to the images, to compensate for + * a small amount of misregistration. A typical number might + * be 5, which uses a 5x5 Sel. Grayscale dilation expands + * lighter pixels into darker pixel regions. + * * %mindiff determines the threshold on the difference in + * pixel values to be counted -- two pixels are not similar + * if their difference in value is at least %mindiff. For + * color pixels, we use the maximum component difference. + * (2) The pixelwise comparison is always done with the UL corners + * aligned. The sizes of pix1 and pix2 need not be the same, + * although in practice it can be useful to scale to the same size. + * (3) If there is a colormap, it is removed and the result + * is either gray or RGB depending on the colormap. + * (4) Two optional diff images can be retrieved (typ. for debugging): + * pixdiff1: the gray or color difference + * pixdiff2: thresholded to 1 bpp for pixels exceeding %mindiff + * (5) The returned value of fract can be compared to some threshold, + * which is application dependent. + * (6) This method is in analogy to the two-sided hausdorff transform, + * except here it is for d > 1. For d == 1 (see pixRankHaustest()), + * we verify that when one pix1 is dilated, it covers at least a + * given fraction of the pixels in pix2, and v.v.; in that + * case, the two pix are sufficiently similar. Here, we + * do an analogous thing: subtract the dilated pix1 from pix2 to + * get a 1-sided hausdorff-like transform. Then do it the + * other way. Take the component-wise max of the two results, + * and threshold to get the fraction of pixels with a difference + * below the threshold. + * </pre> + */ +l_ok +pixGetPerceptualDiff(PIX *pixs1, + PIX *pixs2, + l_int32 sampling, + l_int32 dilation, + l_int32 mindiff, + l_float32 *pfract, + PIX **ppixdiff1, + PIX **ppixdiff2) +{ +l_int32 d1, d2, w, h, count; +PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; +PIX *pix10, *pix11; + + if (ppixdiff1) *ppixdiff1 = NULL; + if (ppixdiff2) *ppixdiff2 = NULL; + if (!pfract) + return ERROR_INT("&fract not defined", __func__, 1); + *pfract = 1.0; /* init to completely different */ + if ((dilation & 1) == 0) + return ERROR_INT("dilation must be odd", __func__, 1); + if (!pixs1) + return ERROR_INT("pixs1 not defined", __func__, 1); + if (!pixs2) + return ERROR_INT("pixs2 not defined", __func__, 1); + d1 = pixGetDepth(pixs1); + d2 = pixGetDepth(pixs2); + if (!pixGetColormap(pixs1) && d1 < 8) + return ERROR_INT("pixs1 not cmapped and < 8 bpp", __func__, 1); + if (!pixGetColormap(pixs2) && d2 < 8) + return ERROR_INT("pixs2 not cmapped and < 8 bpp", __func__, 1); + + /* Integer downsample if requested */ + if (sampling > 1) { + pix1 = pixScaleByIntSampling(pixs1, sampling); + pix2 = pixScaleByIntSampling(pixs2, sampling); + } else { + pix1 = pixClone(pixs1); + pix2 = pixClone(pixs2); + } + + /* Remove colormaps */ + if (pixGetColormap(pix1)) { + pix3 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC); + d1 = pixGetDepth(pix3); + } else { + pix3 = pixClone(pix1); + } + if (pixGetColormap(pix2)) { + pix4 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC); + d2 = pixGetDepth(pix4); + } else { + pix4 = pixClone(pix2); + } + pixDestroy(&pix1); + pixDestroy(&pix2); + if (d1 != d2 || (d1 != 8 && d1 != 32)) { + pixDestroy(&pix3); + pixDestroy(&pix4); + L_INFO("depths unequal or not in {8,32}: d1 = %d, d2 = %d\n", + __func__, d1, d2); + return 1; + } + + /* In each direction, do a small dilation and subtract the dilated + * image from the other image to get a one-sided difference. + * Then take the max of the differences for each direction + * and clipping each component to 255 if necessary. Note that + * for RGB images, the dilations and max selection are done + * component-wise, and the conversion to grayscale also uses the + * maximum component. The resulting grayscale images are + * thresholded using %mindiff. */ + if (d1 == 8) { + pix5 = pixDilateGray(pix3, dilation, dilation); + pixCompareGray(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, + &pix7); + pix6 = pixDilateGray(pix4, dilation, dilation); + pixCompareGray(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, + &pix8); + pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX); + pix10 = pixThresholdToBinary(pix9, mindiff); + pixInvert(pix10, pix10); + pixCountPixels(pix10, &count, NULL); + pixGetDimensions(pix10, &w, &h, NULL); + *pfract = (w <= 0 || h <= 0) ? 0.0 : + (l_float32)count / (l_float32)(w * h); + pixDestroy(&pix5); + pixDestroy(&pix6); + pixDestroy(&pix7); + pixDestroy(&pix8); + if (ppixdiff1) + *ppixdiff1 = pix9; + else + pixDestroy(&pix9); + if (ppixdiff2) + *ppixdiff2 = pix10; + else + pixDestroy(&pix10); + } else { /* d1 == 32 */ + pix5 = pixColorMorph(pix3, L_MORPH_DILATE, dilation, dilation); + pixCompareRGB(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, + &pix7); + pix6 = pixColorMorph(pix4, L_MORPH_DILATE, dilation, dilation); + pixCompareRGB(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL, + &pix8); + pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX); + pix10 = pixConvertRGBToGrayMinMax(pix9, L_CHOOSE_MAX); + pix11 = pixThresholdToBinary(pix10, mindiff); + pixInvert(pix11, pix11); + pixCountPixels(pix11, &count, NULL); + pixGetDimensions(pix11, &w, &h, NULL); + *pfract = (w <= 0 || h <= 0) ? 0.0 : + (l_float32)count / (l_float32)(w * h); + pixDestroy(&pix5); + pixDestroy(&pix6); + pixDestroy(&pix7); + pixDestroy(&pix8); + pixDestroy(&pix10); + if (ppixdiff1) + *ppixdiff1 = pix9; + else + pixDestroy(&pix9); + if (ppixdiff2) + *ppixdiff2 = pix11; + else + pixDestroy(&pix11); + + } + pixDestroy(&pix3); + pixDestroy(&pix4); + return 0; +} + + +/*! + * \brief pixGetPSNR() + * + * \param[in] pix1, pix2 8 or 32 bpp; no colormap + * \param[in] factor sampling factor; >= 1 + * \param[out] ppsnr power signal/noise ratio difference + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This computes the power S/N ratio, in dB, for the difference + * between two images. By convention, the power S/N + * for a grayscale image is ('log' == log base 10, + * and 'ln == log base e): + * PSNR = 10 * log((255/MSE)^2) + * = 4.3429 * ln((255/MSE)^2) + * = -4.3429 * ln((MSE/255)^2) + * where MSE is the mean squared error. + * Here are some examples: + * MSE PSNR + * --- ---- + * 10 28.1 + * 3 38.6 + * 1 48.1 + * 0.1 68.1 + * (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0 + * and the PSNR is infinity. For that case, this returns + * PSNR = 1000, which corresponds to the very small MSE of + * about 10^(-48). + * </pre> + */ +l_ok +pixGetPSNR(PIX *pix1, + PIX *pix2, + l_int32 factor, + l_float32 *ppsnr) +{ +l_int32 same, i, j, w, h, d, wpl1, wpl2, v1, v2, r1, g1, b1, r2, g2, b2; +l_uint32 *data1, *data2, *line1, *line2; +l_float32 mse; /* mean squared error */ + + if (!ppsnr) + return ERROR_INT("&psnr not defined", __func__, 1); + *ppsnr = 0.0; + if (!pix1 || !pix2) + return ERROR_INT("empty input pix", __func__, 1); + if (!pixSizesEqual(pix1, pix2)) + return ERROR_INT("pix sizes unequal", __func__, 1); + if (pixGetColormap(pix1)) + return ERROR_INT("pix1 has colormap", __func__, 1); + if (pixGetColormap(pix2)) + return ERROR_INT("pix2 has colormap", __func__, 1); + pixGetDimensions(pix1, &w, &h, &d); + if (d != 8 && d != 32) + return ERROR_INT("pix not 8 or 32 bpp", __func__, 1); + if (factor < 1) + return ERROR_INT("invalid sampling factor", __func__, 1); + + pixEqual(pix1, pix2, &same); + if (same) { + *ppsnr = 1000.0; /* crazy big exponent */ + return 0; + } + + data1 = pixGetData(pix1); + data2 = pixGetData(pix2); + wpl1 = pixGetWpl(pix1); + wpl2 = pixGetWpl(pix2); + mse = 0.0; + if (d == 8) { + for (i = 0; i < h; i += factor) { + line1 = data1 + i * wpl1; + line2 = data2 + i * wpl2; + for (j = 0; j < w; j += factor) { + v1 = GET_DATA_BYTE(line1, j); + v2 = GET_DATA_BYTE(line2, j); + mse += (l_float32)(v1 - v2) * (v1 - v2); + } + } + } else { /* d == 32 */ + for (i = 0; i < h; i += factor) { + line1 = data1 + i * wpl1; + line2 = data2 + i * wpl2; + for (j = 0; j < w; j += factor) { + extractRGBValues(line1[j], &r1, &g1, &b1); + extractRGBValues(line2[j], &r2, &g2, &b2); + mse += ((l_float32)(r1 - r2) * (r1 - r2) + + (g1 - g2) * (g1 - g2) + + (b1 - b2) * (b1 - b2)) / 3.0; + } + } + } + mse = mse / ((l_float32)(w) * h); + + *ppsnr = -4.3429448 * log(mse / (255 * 255)); + return 0; +} + + +/*------------------------------------------------------------------* + * Comparison of photo regions by histogram * + *------------------------------------------------------------------*/ +/*! + * \brief pixaComparePhotoRegionsByHisto() + * + * \param[in] pixa any depth; colormap OK + * \param[in] minratio requiring sizes be compatible; < 1.0 + * \param[in] textthresh threshold for text/photo; use 0 for default + * \param[in] factor subsampling; >= 1 + * \param[in] n in range {1, ... 7}. n^2 is the maximum number + * of subregions for histograms; typ. n = 3. + * \param[in] simthresh threshold for similarity; use 0 for default + * \param[out] pnai array giving similarity class indices + * \param[out] pscores [optional] score matrix as 1-D array of size N^2 + * \param[out] ppixd [optional] pix of similarity classes + * \param[in] debug 1 to output histograms; 0 otherwise + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This function takes a pixa of cropped photo images and + * compares each one to the others for similarity. + * Each image is first tested to see if it is a photo that can + * be compared by tiled histograms. If so, it is padded to put + * the centroid in the center of the image, and the histograms + * are generated. The final step of comparing each histogram + * with all the others is very fast. + * (2) To make the histograms, each image is subdivided in a maximum + * of n^2 subimages. The parameter %n specifies the "side" of + * an n x n grid of such subimages. If the subimages have an + * aspect ratio larger than 2, the grid will change, again using n^2 + * as a maximum for the number of subimages. For example, + * if n == 3, but the image is 600 x 200 pixels, a 3x3 grid + * would have subimages of 200 x 67 pixels, which is more + * than 2:1, so we change to a 4x2 grid where each subimage + * has 150 x 100 pixels. + * (3) An initial filter gives %score = 0 if the ratio of widths + * and heights (smallest / largest) does not exceed a + * threshold %minratio. If set at 1.0, both images must be + * exactly the same size. A typical value for %minratio is 0.9. + * (4) The comparison score between two images is a value in [0.0 .. 1.0]. + * If the comparison score >= %simthresh, the images are placed in + * the same similarity class. Default value for %simthresh is 0.25. + * (5) An array %nai of similarity class indices for pix in the + * input pixa is returned. + * (6) There are two debugging options: + * * An optional 2D matrix of scores is returned as a 1D array. + * A visualization of this is written to a temp file. + * * An optional pix showing the similarity classes can be + * returned. Text in each input pix is reproduced. + * (7) See the notes in pixComparePhotoRegionsByHisto() for details + * on the implementation. + * </pre> + */ +l_ok +pixaComparePhotoRegionsByHisto(PIXA *pixa, + l_float32 minratio, + l_float32 textthresh, + l_int32 factor, + l_int32 n, + l_float32 simthresh, + NUMA **pnai, + l_float32 **pscores, + PIX **ppixd, + l_int32 debug) +{ +char *text; +l_int32 i, j, nim, w, h, w1, h1, w2, h2, ival, index, classid; +l_float32 score; +l_float32 *scores; +NUMA *nai, *naw, *nah; +NUMAA *naa; +NUMAA **n3a; /* array of naa */ +PIX *pix; + + if (pscores) *pscores = NULL; + if (ppixd) *ppixd = NULL; + if (!pnai) + return ERROR_INT("&na not defined", __func__, 1); + *pnai = NULL; + if (!pixa) + return ERROR_INT("pixa not defined", __func__, 1); + if (minratio < 0.0 || minratio > 1.0) + return ERROR_INT("minratio not in [0.0 ... 1.0]", __func__, 1); + if (textthresh <= 0.0) textthresh = 1.3f; + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (n < 1 || n > 7) { + L_WARNING("n = %d is invalid; setting to 4\n", __func__, n); + n = 4; + } + if (simthresh <= 0.0) simthresh = 0.25; + if (simthresh > 1.0) + return ERROR_INT("simthresh invalid; should be near 0.25", __func__, 1); + + /* Prepare the histograms */ + nim = pixaGetCount(pixa); + if ((n3a = (NUMAA **)LEPT_CALLOC(nim, sizeof(NUMAA *))) == NULL) + return ERROR_INT("calloc fail for n3a", __func__, 1); + naw = numaCreate(0); + nah = numaCreate(0); + for (i = 0; i < nim; i++) { + pix = pixaGetPix(pixa, i, L_CLONE); + text = pixGetText(pix); + pixSetResolution(pix, 150, 150); + index = (debug) ? i : 0; + pixGenPhotoHistos(pix, NULL, factor, textthresh, n, + &naa, &w, &h, index); + n3a[i] = naa; + numaAddNumber(naw, w); + numaAddNumber(nah, h); + if (naa) + lept_stderr("Image %s is photo\n", text); + else + lept_stderr("Image %s is NOT photo\n", text); + pixDestroy(&pix); + } + + /* Do the comparisons. We are making a set of classes, where + * all similar images are placed in the same class. There are + * 'nim' input images. The classes are labeled by 'classid' (all + * similar images get the same 'classid' value), and 'nai' maps + * the classid of the image in the input array to the classid + * of the similarity class. */ + if ((scores = + (l_float32 *)LEPT_CALLOC((size_t)nim * nim, sizeof(l_float32))) + == NULL) { + L_ERROR("calloc fail for scores\n", __func__); + goto cleanup; + } + nai = numaMakeConstant(-1, nim); /* classid array */ + for (i = 0, classid = 0; i < nim; i++) { + scores[nim * i + i] = 1.0; + numaGetIValue(nai, i, &ival); + if (ival != -1) /* already set */ + continue; + numaSetValue(nai, i, classid); + if (n3a[i] == NULL) { /* not a photo */ + classid++; + continue; + } + numaGetIValue(naw, i, &w1); + numaGetIValue(nah, i, &h1); + for (j = i + 1; j < nim; j++) { + numaGetIValue(nai, j, &ival); + if (ival != -1) /* already set */ + continue; + if (n3a[j] == NULL) /* not a photo */ + continue; + numaGetIValue(naw, j, &w2); + numaGetIValue(nah, j, &h2); + compareTilesByHisto(n3a[i], n3a[j], minratio, w1, h1, w2, h2, + &score, NULL); + scores[nim * i + j] = score; + scores[nim * j + i] = score; /* the score array is symmetric */ +/* lept_stderr("score = %5.3f\n", score); */ + if (score > simthresh) { + numaSetValue(nai, j, classid); + lept_stderr( + "Setting %d similar to %d, in class %d; score %5.3f\n", + j, i, classid, score); + } + } + classid++; + } + *pnai = nai; + + /* Debug: optionally save and display the score array. + * All images that are photos are represented by a point on + * the diagonal. Other images in the same similarity class + * are on the same horizontal raster line to the right. + * The array has been symmetrized, so images in the same + * same similarity class also appear on the same column below. */ + if (pscores) { + l_int32 wpl, fact; + l_uint32 *line, *data; + PIX *pix2, *pix3; + pix2 = pixCreate(nim, nim, 8); + data = pixGetData(pix2); + wpl = pixGetWpl(pix2); + for (i = 0; i < nim; i++) { + line = data + i * wpl; + for (j = 0; j < nim; j++) { + SET_DATA_BYTE(line, j, + L_MIN(255, 4.0 * 255 * scores[nim * i + j])); + } + } + fact = L_MAX(2, 1000 / nim); + pix3 = pixExpandReplicate(pix2, fact); + lept_stderr("Writing to /tmp/lept/comp/scorearray.png\n"); + lept_mkdir("lept/comp"); + pixWrite("/tmp/lept/comp/scorearray.png", pix3, IFF_PNG); + pixDestroy(&pix2); + pixDestroy(&pix3); + *pscores = scores; + } else { + LEPT_FREE(scores); + } + + /* Debug: optionally display and save the image comparisons. + * Image similarity classes are displayed by column; similar + * images are displayed in the same column. */ + if (ppixd) + *ppixd = pixaDisplayTiledByIndex(pixa, nai, 200, 20, 2, 6, 0x0000ff00); + +cleanup: + numaDestroy(&naw); + numaDestroy(&nah); + for (i = 0; i < nim; i++) + numaaDestroy(&n3a[i]); + LEPT_FREE(n3a); + return 0; +} + + +/*! + * \brief pixComparePhotoRegionsByHisto() + * + * \param[in] pix1, pix2 any depth; colormap OK + * \param[in] box1, box2 [optional] photo regions from each; can be null + * \param[in] minratio requiring sizes be compatible; < 1.0 + * \param[in] factor subsampling factor; >= 1 + * \param[in] n in range {1, ... 7}. n^2 is the maximum number + * of subregions for histograms; typ. n = 3. + * \param[out] pscore similarity score of histograms + * \param[in] debugflag 1 for debug output; 0 for no debugging + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This function compares two grayscale photo regions. If a + * box is given, the region is clipped; otherwise assume + * the entire images are photo regions. This is done with a + * set of not more than n^2 spatially aligned histograms, which are + * aligned using the centroid of the inverse image. + * (2) The parameter %n specifies the "side" of an n x n grid + * of subimages. If the subimages have an aspect ratio larger + * than 2, the grid will change, using n^2 as a maximum for + * the number of subimages. For example, if n == 3, but the + * image is 600 x 200 pixels, a 3x3 grid would have subimages + * of 200 x 67 pixels, which is more than 2:1, so we change + * to a 4x2 grid where each subimage has 150 x 100 pixels. + * (3) An initial filter gives %score = 0 if the ratio of widths + * and heights (smallest / largest) does not exceed a + * threshold %minratio. This must be between 0.5 and 1.0. + * If set at 1.0, both images must be exactly the same size. + * A typical value for %minratio is 0.9. + * (4) Because this function should not be used on text or + * line graphics, which can give false positive results + * (i.e., high scores for different images), filter the images + * using pixGenPhotoHistos(), which returns tiled histograms + * only if an image is not text and comparison is expected + * to work with histograms. If either image fails the test, + * the comparison returns a score of 0.0. + * (5) The white value counts in the histograms are removed; they + * are typically pixels that were padded to achieve alignment. + * (6) For an efficient representation of the histogram, normalize + * using a multiplicative factor so that the number in the + * maximum bucket is 255. It then takes 256 bytes to store. + * (7) When comparing the histograms of two regions, use the + * Earth Mover distance (EMD), with the histograms normalized + * so that the sum over bins is the same. Further normalize + * by dividing by 255, so that the result is in [0.0 ... 1.0]. + * (8) Get a similarity score S = 1.0 - k * D, where + * k is a constant, say in the range 5-10 + * D = normalized EMD + * and for multiple tiles, take the Min(S) to be the final score. + * Using aligned tiles gives protection against accidental + * similarity of the overall grayscale histograms. + * A small number of aligned tiles works well. + * (9) With debug on, you get a pdf that shows, for each tile, + * the images, histograms and score. + * </pre> + */ +l_ok +pixComparePhotoRegionsByHisto(PIX *pix1, + PIX *pix2, + BOX *box1, + BOX *box2, + l_float32 minratio, + l_int32 factor, + l_int32 n, + l_float32 *pscore, + l_int32 debugflag) +{ +l_int32 w1, h1, w2, h2, w1c, h1c, w2c, h2c, debugindex; +l_float32 wratio, hratio; +NUMAA *naa1, *naa2; +PIX *pix3, *pix4; +PIXA *pixa; + + if (!pscore) + return ERROR_INT("&score not defined", __func__, 1); + *pscore = 0.0; + if (!pix1 || !pix2) + return ERROR_INT("pix1 and pix2 not both defined", __func__, 1); + if (minratio < 0.5 || minratio > 1.0) + return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1); + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (n < 1 || n > 7) { + L_WARNING("n = %d is invalid; setting to 4\n", __func__, n); + n = 4; + } + + debugindex = 0; + if (debugflag) { + lept_mkdir("lept/comp"); + debugindex = 666; /* arbitrary number used for naming output */ + } + + /* Initial filter by size */ + if (box1) + boxGetGeometry(box1, NULL, NULL, &w1, &h1); + else + pixGetDimensions(pix1, &w1, &h1, NULL); + if (box2) + boxGetGeometry(box2, NULL, NULL, &w2, &h2); + else + pixGetDimensions(pix1, &w2, &h2, NULL); + wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : + (l_float32)w2 / (l_float32)w1; + hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : + (l_float32)h2 / (l_float32)h1; + if (wratio < minratio || hratio < minratio) + return 0; + + /* Initial crop, if necessary, and make histos */ + if (box1) + pix3 = pixClipRectangle(pix1, box1, NULL); + else + pix3 = pixClone(pix1); + pixGenPhotoHistos(pix3, NULL, factor, 0, n, &naa1, &w1c, &h1c, debugindex); + pixDestroy(&pix3); + if (!naa1) return 0; + if (box2) + pix4 = pixClipRectangle(pix2, box2, NULL); + else + pix4 = pixClone(pix2); + pixGenPhotoHistos(pix4, NULL, factor, 0, n, &naa2, &w2c, &h2c, debugindex); + pixDestroy(&pix4); + if (!naa2) return 0; + + /* Compare histograms */ + pixa = (debugflag) ? pixaCreate(0) : NULL; + compareTilesByHisto(naa1, naa2, minratio, w1c, h1c, w2c, h2c, pscore, pixa); + pixaDestroy(&pixa); + return 0; +} + + +/*! + * \brief pixGenPhotoHistos() + * + * \param[in] pixs depth > 1 bpp; colormap OK + * \param[in] box [optional] region to be selected; can be null + * \param[in] factor subsampling; >= 1 + * \param[in] thresh threshold for photo/text; use 0 for default + * \param[in] n in range {1, ... 7}. n^2 is the maximum number + * of subregions for histograms; typ. n = 3. + * \param[out] pnaa nx * ny 256-entry gray histograms + * \param[out] pw width of image used to make histograms + * \param[out] ph height of image used to make histograms + * \param[in] debugindex 0 for no debugging; positive integer otherwise + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This crops and converts to 8 bpp if necessary. It adds a + * minimal white boundary such that the centroid of the + * photo-inverted image is in the center. This allows + * automatic alignment with histograms of other image regions. + * (2) The parameter %n specifies the "side" of the n x n grid + * of subimages. If the subimages have an aspect ratio larger + * than 2, the grid will change, using n^2 as a maximum for + * the number of subimages. For example, if n == 3, but the + * image is 600 x 200 pixels, a 3x3 grid would have subimages + * of 200 x 67 pixels, which is more than 2:1, so we change + * to a 4x2 grid where each subimage has 150 x 100 pixels. + * (3) The white value in the histogram is removed, because of + * the padding. + * (4) Use 0 for conservative default (1.3) for thresh. + * (5) For an efficient representation of the histogram, normalize + * using a multiplicative factor so that the number in the + * maximum bucket is 255. It then takes 256 bytes to store. + * (6) With %debugindex > 0, this makes a pdf that shows, for each tile, + * the images and histograms. + * </pre> + */ +l_ok +pixGenPhotoHistos(PIX *pixs, + BOX *box, + l_int32 factor, + l_float32 thresh, + l_int32 n, + NUMAA **pnaa, + l_int32 *pw, + l_int32 *ph, + l_int32 debugindex) +{ +char buf[64]; +NUMAA *naa; +PIX *pix1, *pix2, *pix3, *pixm; +PIXA *pixa; + + if (pnaa) *pnaa = NULL; + if (pw) *pw = 0; + if (ph) *ph = 0; + if (!pnaa) + return ERROR_INT("&naa not defined", __func__, 1); + if (!pw || !ph) + return ERROR_INT("&w and &h not both defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) == 1) + return ERROR_INT("pixs not defined or 1 bpp", __func__, 1); + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (thresh <= 0.0) thresh = 1.3f; /* default */ + if (n < 1 || n > 7) { + L_WARNING("n = %d is invalid; setting to 4\n", __func__, n); + n = 4; + } + + pixa = NULL; + if (debugindex > 0) { + pixa = pixaCreate(0); + lept_mkdir("lept/comp"); + } + + /* Initial crop, if necessary */ + if (box) + pix1 = pixClipRectangle(pixs, box, NULL); + else + pix1 = pixClone(pixs); + + /* Convert to 8 bpp and pad to center the centroid */ + pix2 = pixConvertTo8(pix1, FALSE); + pix3 = pixPadToCenterCentroid(pix2, factor); + + /* Set to 255 all pixels above 230. Do this so that light gray + * pixels do not enter into the comparison. */ + pixm = pixThresholdToBinary(pix3, 230); + pixInvert(pixm, pixm); + pixSetMaskedGeneral(pix3, pixm, 255, 0, 0); + pixDestroy(&pixm); + + if (debugindex > 0) { + PIX *pix4, *pix5, *pix6, *pix7, *pix8; + PIXA *pixa2; + pix4 = pixConvertTo32(pix2); + pix5 = pixConvertTo32(pix3); + pix6 = pixScaleToSize(pix4, 400, 0); + pix7 = pixScaleToSize(pix5, 400, 0); + pixa2 = pixaCreate(2); + pixaAddPix(pixa2, pix6, L_INSERT); + pixaAddPix(pixa2, pix7, L_INSERT); + pix8 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 3); + pixaAddPix(pixa, pix8, L_INSERT); + pixDestroy(&pix4); + pixDestroy(&pix5); + pixaDestroy(&pixa2); + } + pixDestroy(&pix1); + pixDestroy(&pix2); + + /* Test if this is a photoimage */ + pixDecideIfPhotoImage(pix3, factor, thresh, n, &naa, pixa); + if (naa) { + *pnaa = naa; + *pw = pixGetWidth(pix3); + *ph = pixGetHeight(pix3); + } + + if (pixa) { + snprintf(buf, sizeof(buf), "/tmp/lept/comp/tiledhistos.%d.pdf", + debugindex); + lept_stderr("Writing to %s\n", buf); + pixaConvertToPdf(pixa, 300, 1.0, L_FLATE_ENCODE, 0, NULL, buf); + pixaDestroy(&pixa); + } + + pixDestroy(&pix3); + return 0; +} + + +/*! + * \brief pixPadToCenterCentroid() + * + * \param[in] pixs any depth, colormap OK + * \param[in] factor subsampling for centroid; >= 1 + * \return pixd padded with white pixels, or NULL on error. + * + * <pre> + * Notes: + * (1) This add minimum white padding to an 8 bpp pix, such that + * the centroid of the photometric inverse is in the center of + * the resulting image. Thus in computing the centroid, + * black pixels have weight 255, and white pixels have weight 0. + * </pre> + */ +PIX * +pixPadToCenterCentroid(PIX *pixs, + l_int32 factor) + +{ +l_float32 cx, cy; +l_int32 xs, ys, delx, dely, icx, icy, ws, hs, wd, hd; +PIX *pix1, *pixd; + + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + if (factor < 1) + return (PIX *)ERROR_PTR("invalid sampling factor", __func__, NULL); + + pix1 = pixConvertTo8(pixs, FALSE); + pixCentroid8(pix1, factor, &cx, &cy); + icx = (l_int32)(cx + 0.5); + icy = (l_int32)(cy + 0.5); + pixGetDimensions(pix1, &ws, &hs, NULL); + delx = ws - 2 * icx; + dely = hs - 2 * icy; + xs = L_MAX(0, delx); + ys = L_MAX(0, dely); + wd = 2 * L_MAX(icx, ws - icx); + hd = 2 * L_MAX(icy, hs - icy); + pixd = pixCreate(wd, hd, 8); + pixSetAll(pixd); /* to white */ + pixCopyResolution(pixd, pixs); + pixRasterop(pixd, xs, ys, ws, hs, PIX_SRC, pix1, 0, 0); + pixDestroy(&pix1); + return pixd; +} + + +/*! + * \brief pixCentroid8() + * + * \param[in] pixs 8 bpp + * \param[in] factor subsampling factor; >= 1 + * \param[out] pcx x value of centroid + * \param[out] pcy y value of centroid + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This first does a photometric inversion (black = 255, white = 0). + * It then finds the centroid of the result. The inversion is + * done because white is usually background, so the centroid + * is computed based on the "foreground" gray pixels, and the + * darker the pixel, the more weight it is given. + * </pre> + */ +l_ok +pixCentroid8(PIX *pixs, + l_int32 factor, + l_float32 *pcx, + l_float32 *pcy) +{ +l_int32 i, j, w, h, wpl, val; +l_float32 sumx, sumy, sumv; +l_uint32 *data, *line; +PIX *pix1; + + if (pcx) *pcx = 0.0; + if (pcy) *pcy = 0.0; + if (!pixs || pixGetDepth(pixs) != 8) + return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1); + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (!pcx || !pcy) + return ERROR_INT("&cx and &cy not both defined", __func__, 1); + + pix1 = pixInvert(NULL, pixs); + pixGetDimensions(pix1, &w, &h, NULL); + data = pixGetData(pix1); + wpl = pixGetWpl(pix1); + sumx = sumy = sumv = 0.0; + for (i = 0; i < h; i++) { + line = data + i * wpl; + for (j = 0; j < w; j++) { + val = GET_DATA_BYTE(line, j); + sumx += val * j; + sumy += val * i; + sumv += val; + } + } + pixDestroy(&pix1); + + if (sumv == 0) { + L_INFO("input image is white\n", __func__); + *pcx = (l_float32)(w) / 2; + *pcy = (l_float32)(h) / 2; + } else { + *pcx = sumx / sumv; + *pcy = sumy / sumv; + } + + return 0; +} + + +/*! + * \brief pixDecideIfPhotoImage() + * + * \param[in] pix 8 bpp, centroid in center + * \param[in] factor subsampling for histograms; >= 1 + * \param[in] thresh threshold for photo/text; use 0 for default + * \param[in] n in range {1, ... 7}. n^2 is the maximum number + * of subregions for histograms; typ. n = 3. + * \param[out] pnaa array of normalized histograms + * \param[in] pixadebug [optional] use only for debug output + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) The input image must be 8 bpp (no colormap), and padded with + * white pixels so the centroid of photo-inverted pixels is at + * the center of the image. + * (2) The parameter %n specifies the "side" of the n x n grid + * of subimages. If the subimages have an aspect ratio larger + * than 2, the grid will change, using n^2 as a maximum for + * the number of subimages. For example, if n == 3, but the + * image is 600 x 200 pixels, a 3x3 grid would have subimages + * of 200 x 67 pixels, which is more than 2:1, so we change + * to a 4x2 grid where each subimage has 150 x 100 pixels. + * (3) If the pix is not almost certainly a photoimage, the returned + * histograms (%naa) are null. + * (4) If histograms are generated, the white (255) count is set + * to 0. This removes all pixels values above 230, including + * white padding from the centroid matching operation, from + * consideration. The resulting histograms are then normalized + * so the maximum count is 255. + * (5) Default for %thresh is 1.3; this seems sufficiently conservative. + * (6) Use %pixadebug == NULL unless debug output is requested. + * </pre> + */ +l_ok +pixDecideIfPhotoImage(PIX *pix, + l_int32 factor, + l_float32 thresh, + l_int32 n, + NUMAA **pnaa, + PIXA *pixadebug) +{ +char buf[64]; +l_int32 i, w, h, nx, ny, ngrids, istext, isphoto; +l_float32 maxval, sum1, sum2, ratio; +L_BMF *bmf; +NUMA *na1, *na2, *na3, *narv; +NUMAA *naa; +PIX *pix1; +PIXA *pixa1, *pixa2, *pixa3; + + if (!pnaa) + return ERROR_INT("&naa not defined", __func__, 1); + *pnaa = NULL; + if (!pix || pixGetDepth(pix) != 8 || pixGetColormap(pix)) + return ERROR_INT("pix undefined or invalid", __func__, 1); + if (n < 1 || n > 7) { + L_WARNING("n = %d is invalid; setting to 4\n", __func__, n); + n = 4; + } + if (thresh <= 0.0) thresh = 1.3f; /* default */ + + /* Look for text lines */ + pixDecideIfText(pix, NULL, &istext, pixadebug); + if (istext) { + L_INFO("Image is text\n", __func__); + return 0; + } + + /* Determine grid from n */ + pixGetDimensions(pix, &w, &h, NULL); + if (w == 0 || h == 0) + return ERROR_INT("invalid pix dimension", __func__, 1); + findHistoGridDimensions(n, w, h, &nx, &ny, 1); + + /* Evaluate histograms in each tile */ + pixa1 = pixaSplitPix(pix, nx, ny, 0, 0); + ngrids = nx * ny; + bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; + naa = numaaCreate(ngrids); + if (pixadebug) { + lept_rmdir("lept/compplot"); + lept_mkdir("lept/compplot"); + } + for (i = 0; i < ngrids; i++) { + pix1 = pixaGetPix(pixa1, i, L_CLONE); + + /* Get histograms, set white count to 0, normalize max to 255 */ + na1 = pixGetGrayHistogram(pix1, factor); + numaSetValue(na1, 255, 0); + na2 = numaWindowedMean(na1, 5); /* do some smoothing */ + numaGetMax(na2, &maxval, NULL); + na3 = numaTransform(na2, 0, 255.0 / maxval); + if (pixadebug) { + snprintf(buf, sizeof(buf), "/tmp/lept/compplot/plot.%d", i); + gplotSimple1(na3, GPLOT_PNG, buf, "Histos"); + } + + numaaAddNuma(naa, na3, L_INSERT); + numaDestroy(&na1); + numaDestroy(&na2); + pixDestroy(&pix1); + } + if (pixadebug) { + pix1 = pixaDisplayTiledInColumns(pixa1, nx, 1.0, 30, 2); + pixaAddPix(pixadebug, pix1, L_INSERT); + pixa2 = pixaReadFiles("/tmp/lept/compplot", ".png"); + pixa3 = pixaScale(pixa2, 0.4f, 0.4f); + pix1 = pixaDisplayTiledInColumns(pixa3, nx, 1.0, 30, 2); + pixaAddPix(pixadebug, pix1, L_INSERT); + pixaDestroy(&pixa2); + pixaDestroy(&pixa3); + } + + /* Compute the standard deviation between these histos to decide + * if the image is photo or something more like line art, + * which does not support good comparison by tiled histograms. */ + grayInterHistogramStats(naa, 5, NULL, NULL, NULL, &narv); + + /* For photos, the root variance has a larger weight of + * values in the range [50 ... 150] compared to [200 ... 230], + * than text or line art. For the latter, most of the variance + * between tiles is in the lightest parts of the image, well + * above 150. */ + numaGetSumOnInterval(narv, 50, 150, &sum1); + numaGetSumOnInterval(narv, 200, 230, &sum2); + if (sum2 == 0.0) { /* shouldn't happen */ + ratio = 0.001f; /* anything very small for debug output */ + isphoto = 0; /* be conservative */ + } else { + ratio = sum1 / sum2; + isphoto = (ratio > thresh) ? 1 : 0; + } + if (pixadebug) { + if (isphoto) + L_INFO("ratio %f > %f; isphoto is true\n", + __func__, ratio, thresh); + else + L_INFO("ratio %f < %f; isphoto is false\n", + __func__, ratio, thresh); + } + if (isphoto) + *pnaa = naa; + else + numaaDestroy(&naa); + bmfDestroy(&bmf); + numaDestroy(&narv); + pixaDestroy(&pixa1); + return 0; +} + + +/*! + * \brief findHistoGridDimensions() + * + * \param[in] n max number of grid elements is n^2; typ. n = 3 + * \param[in] w width of image to be subdivided + * \param[in] h height of image to be subdivided + * \param[out] pnx number of grid elements in x direction + * \param[out] pny number of grid elements in y direction + * \param[in] debug 1 for debug output to stderr + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This determines the number of subdivisions to be used on + * the image in each direction. A histogram will be built + * for each subimage. + * (2) The parameter %n specifies the "side" of the n x n grid + * of subimages. If the subimages have an aspect ratio larger + * than 2, the grid will change, using n^2 as a maximum for + * the number of subimages. For example, if n == 3, but the + * image is 600 x 200 pixels, a 3x3 grid would have subimages + * of 200 x 67 pixels, which is more than 2:1, so we change + * to a 4x2 grid where each subimage has 150 x 100 pixels. + * </pre> + */ +static l_ok +findHistoGridDimensions(l_int32 n, + l_int32 w, + l_int32 h, + l_int32 *pnx, + l_int32 *pny, + l_int32 debug) +{ +l_int32 nx, ny, max; +l_float32 ratio; + + ratio = (l_float32)w / (l_float32)h; + max = n * n; + nx = ny = n; + while (nx > 1 && ny > 1) { + if (ratio > 2.0) { /* reduce ny */ + ny--; + nx = max / ny; + if (debug) + lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", + nx, ny, ratio); + } else if (ratio < 0.5) { /* reduce nx */ + nx--; + ny = max / nx; + if (debug) + lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", + nx, ny, ratio); + } else { /* we're ok */ + if (debug) + lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n", + nx, ny, ratio); + break; + } + ratio = (l_float32)(ny * w) / (l_float32)(nx * h); + } + *pnx = nx; + *pny = ny; + return 0; +} + + +/*! + * \brief compareTilesByHisto() + * + * \param[in] naa1, naa2 each is a set of 256 entry histograms + * \param[in] minratio requiring image sizes be compatible; < 1.0 + * \param[in] w1, h1, w2, h2 image sizes from which histograms were made + * \param[out] pscore similarity score of histograms + * \param[in] pixadebug [optional] use only for debug output + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) naa1 and naa2 must be generated using pixGenPhotoHistos(), + * using the same tile sizes. + * (2) The image dimensions must be similar. The score is 0.0 + * if the ratio of widths and heights (smallest / largest) + * exceeds a threshold %minratio, which must be between + * 0.5 and 1.0. If set at 1.0, both images must be exactly + * the same size. A typical value for %minratio is 0.9. + * (3) The input pixadebug is null unless debug output is requested. + * </pre> + */ +l_ok +compareTilesByHisto(NUMAA *naa1, + NUMAA *naa2, + l_float32 minratio, + l_int32 w1, + l_int32 h1, + l_int32 w2, + l_int32 h2, + l_float32 *pscore, + PIXA *pixadebug) +{ +char buf1[128], buf2[128]; +l_int32 i, n; +l_float32 wratio, hratio, score, minscore, dist; +L_BMF *bmf; +NUMA *na1, *na2, *nadist, *nascore; + + if (!pscore) + return ERROR_INT("&score not defined", __func__, 1); + *pscore = 0.0; + if (!naa1 || !naa2) + return ERROR_INT("naa1 and naa2 not both defined", __func__, 1); + + /* Filter for different sizes */ + wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : + (l_float32)w2 / (l_float32)w1; + hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : + (l_float32)h2 / (l_float32)h1; + if (wratio < minratio || hratio < minratio) { + if (pixadebug) + L_INFO("Sizes differ: wratio = %f, hratio = %f\n", + __func__, wratio, hratio); + return 0; + } + n = numaaGetCount(naa1); + if (n != numaaGetCount(naa2)) { /* due to differing w/h ratio */ + L_INFO("naa1 and naa2 sizes are different\n", __func__); + return 0; + } + + if (pixadebug) { + lept_rmdir("lept/comptile"); + lept_mkdir("lept/comptile"); + } + + + /* Evaluate histograms in each tile. Remove white before + * computing EMD, because there are may be a lot of white + * pixels due to padding, and we don't want to include them. + * This also makes the debug histo plots more informative. */ + minscore = 1.0; + nadist = numaCreate(n); + nascore = numaCreate(n); + bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; + for (i = 0; i < n; i++) { + na1 = numaaGetNuma(naa1, i, L_CLONE); + na2 = numaaGetNuma(naa2, i, L_CLONE); + numaSetValue(na1, 255, 0.0); + numaSetValue(na2, 255, 0.0); + + /* To compare histograms, use the normalized earthmover distance. + * Further normalize to get the EM distance as a fraction of the + * maximum distance in the histogram (255). Finally, scale this + * up by 10.0, and subtract from 1.0 to get a similarity score. */ + numaEarthMoverDistance(na1, na2, &dist); + score = L_MAX(0.0, 1.0 - 10.0 * (dist / 255.)); + numaAddNumber(nadist, dist); + numaAddNumber(nascore, score); + minscore = L_MIN(minscore, score); + if (pixadebug) { + snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d", i); + gplotSimple2(na1, na2, GPLOT_PNG, buf1, "Histos"); + } + numaDestroy(&na1); + numaDestroy(&na2); + } + *pscore = minscore; + + if (pixadebug) { + for (i = 0; i < n; i++) { + PIX *pix1, *pix2; + snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d.png", i); + pix1 = pixRead(buf1); + numaGetFValue(nadist, i, &dist); + numaGetFValue(nascore, i, &score); + snprintf(buf2, sizeof(buf2), + "Image %d\ndist = %5.3f, score = %5.3f", i, dist, score); + pix2 = pixAddTextlines(pix1, bmf, buf2, 0x0000ff00, L_ADD_BELOW); + pixaAddPix(pixadebug, pix2, L_INSERT); + pixDestroy(&pix1); + } + lept_stderr("Writing to /tmp/lept/comptile/comparegray.pdf\n"); + pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL, + "/tmp/lept/comptile/comparegray.pdf"); + numaWriteDebug("/tmp/lept/comptile/scores.na", nascore); + numaWriteDebug("/tmp/lept/comptile/dists.na", nadist); + } + + bmfDestroy(&bmf); + numaDestroy(&nadist); + numaDestroy(&nascore); + return 0; +} + + +/*! + * \brief pixCompareGrayByHisto() + * + * \param[in] pix1, pix2 any depth; colormap OK + * \param[in] box1, box2 [optional] region selected from each; can be null + * \param[in] minratio requiring sizes be compatible; < 1.0 + * \param[in] maxgray max value to keep in histo; >= 200, 255 to keep all + * \param[in] factor subsampling factor; >= 1 + * \param[in] n in range {1, ... 7}. n^2 is the maximum number + * of subregions for histograms; typ. n = 3. + * \param[out] pscore similarity score of histograms + * \param[in] debugflag 1 for debug output; 0 for no debugging + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This function compares two grayscale photo regions. It can + * do it with a single histogram from each region, or with a + * set of spatially aligned histograms. For both cases, + * align the regions using the centroid of the inverse image, + * and crop to the smallest of the two. + * (2) The parameter %n specifies the "side" of an n x n grid + * of subimages. If the subimages have an aspect ratio larger + * than 2, the grid will change, using n^2 as a maximum for + * the number of subimages. For example, if n == 3, but the + * image is 600 x 200 pixels, a 3x3 grid would have subimages + * of 200 x 67 pixels, which is more than 2:1, so we change + * to a 4x2 grid where each subimage has 150 x 100 pixels. + * (3) An initial filter gives %score = 0 if the ratio of widths + * and heights (smallest / largest) does not exceed a + * threshold %minratio. This must be between 0.5 and 1.0. + * If set at 1.0, both images must be exactly the same size. + * A typical value for %minratio is 0.9. + * (4) The lightest values in the histogram can be disregarded. + * Set %maxgray to the lightest value to be kept. For example, + * to eliminate white (255), set %maxgray = 254. %maxgray must + * be >= 200. + * (5) For an efficient representation of the histogram, normalize + * using a multiplicative factor so that the number in the + * maximum bucket is 255. It then takes 256 bytes to store. + * (6) When comparing the histograms of two regions: + * ~ Use %maxgray = 254 to ignore the white pixels, the number + * of which may be sensitive to the crop region if the pixels + * outside that region are white. + * ~ Use the Earth Mover distance (EMD), with the histograms + * normalized so that the sum over bins is the same. + * Further normalize by dividing by 255, so that the result + * is in [0.0 ... 1.0]. + * (7) Get a similarity score S = 1.0 - k * D, where + * k is a constant, say in the range 5-10 + * D = normalized EMD + * and for multiple tiles, take the Min(S) to be the final score. + * Using aligned tiles gives protection against accidental + * similarity of the overall grayscale histograms. + * A small number of aligned tiles works well. + * (8) With debug on, you get a pdf that shows, for each tile, + * the images, histograms and score. + * (9) When to use: + * (a) Because this function should not be used on text or + * line graphics, which can give false positive results + * (i.e., high scores for different images), the input + * images should be filtered. + * (b) To filter, first use pixDecideIfText(). If that function + * says the image is text, do not use it. If the function + * says it is not text, it still may be line graphics, and + * in that case, use: + * pixGetGrayHistogramTiled() + * grayInterHistogramStats() + * to determine whether it is photo or line graphics. + * </pre> + */ +l_ok +pixCompareGrayByHisto(PIX *pix1, + PIX *pix2, + BOX *box1, + BOX *box2, + l_float32 minratio, + l_int32 maxgray, + l_int32 factor, + l_int32 n, + l_float32 *pscore, + l_int32 debugflag) +{ +l_int32 w1, h1, w2, h2; +l_float32 wratio, hratio; +BOX *box3, *box4; +PIX *pix3, *pix4, *pix5, *pix6, *pix7, *pix8; +PIXA *pixa; + + if (!pscore) + return ERROR_INT("&score not defined", __func__, 1); + *pscore = 0.0; + if (!pix1 || !pix2) + return ERROR_INT("pix1 and pix2 not both defined", __func__, 1); + if (minratio < 0.5 || minratio > 1.0) + return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1); + if (maxgray < 200) + return ERROR_INT("invalid maxgray; should be >= 200", __func__, 1); + maxgray = L_MIN(255, maxgray); + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (n < 1 || n > 7) { + L_WARNING("n = %d is invalid; setting to 4\n", __func__, n); + n = 4; + } + + if (debugflag) + lept_mkdir("lept/comp"); + + /* Initial filter by size */ + if (box1) + boxGetGeometry(box1, NULL, NULL, &w1, &h1); + else + pixGetDimensions(pix1, &w1, &h1, NULL); + if (box2) + boxGetGeometry(box2, NULL, NULL, &w2, &h2); + else + pixGetDimensions(pix1, &w2, &h2, NULL); + wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 : + (l_float32)w2 / (l_float32)w1; + hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 : + (l_float32)h2 / (l_float32)h1; + if (wratio < minratio || hratio < minratio) + return 0; + + /* Initial crop, if necessary */ + if (box1) + pix3 = pixClipRectangle(pix1, box1, NULL); + else + pix3 = pixClone(pix1); + if (box2) + pix4 = pixClipRectangle(pix2, box2, NULL); + else + pix4 = pixClone(pix2); + + /* Convert to 8 bpp, align centroids and do maximal crop */ + pix5 = pixConvertTo8(pix3, FALSE); + pix6 = pixConvertTo8(pix4, FALSE); + pixCropAlignedToCentroid(pix5, pix6, factor, &box3, &box4); + pix7 = pixClipRectangle(pix5, box3, NULL); + pix8 = pixClipRectangle(pix6, box4, NULL); + pixa = (debugflag) ? pixaCreate(0) : NULL; + if (debugflag) { + PIX *pix9, *pix10, *pix11, *pix12, *pix13; + PIXA *pixa2; + pix9 = pixConvertTo32(pix5); + pix10 = pixConvertTo32(pix6); + pixRenderBoxArb(pix9, box3, 2, 255, 0, 0); + pixRenderBoxArb(pix10, box4, 2, 255, 0, 0); + pix11 = pixScaleToSize(pix9, 400, 0); + pix12 = pixScaleToSize(pix10, 400, 0); + pixa2 = pixaCreate(2); + pixaAddPix(pixa2, pix11, L_INSERT); + pixaAddPix(pixa2, pix12, L_INSERT); + pix13 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 0); + pixaAddPix(pixa, pix13, L_INSERT); + pixDestroy(&pix9); + pixDestroy(&pix10); + pixaDestroy(&pixa2); + } + pixDestroy(&pix3); + pixDestroy(&pix4); + pixDestroy(&pix5); + pixDestroy(&pix6); + boxDestroy(&box3); + boxDestroy(&box4); + + /* Tile and compare histograms */ + pixCompareTilesByHisto(pix7, pix8, maxgray, factor, n, pscore, pixa); + pixaDestroy(&pixa); + pixDestroy(&pix7); + pixDestroy(&pix8); + return 0; +} + + +/*! + * \brief pixCompareTilesByHisto() + * + * \param[in] pix1, pix2 8 bpp + * \param[in] maxgray max value to keep in histo; 255 to keep all + * \param[in] factor subsampling factor; >= 1 + * \param[in] n see pixCompareGrayByHisto() + * \param[out] pscore similarity score of histograms + * \param[in] pixadebug [optional] use only for debug output + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This static function is only called from pixCompareGrayByHisto(). + * The input images have been converted to 8 bpp if necessary, + * aligned and cropped. + * (2) The input pixadebug is null unless debug output is requested. + * (3) See pixCompareGrayByHisto() for details. + * </pre> + */ +static l_ok +pixCompareTilesByHisto(PIX *pix1, + PIX *pix2, + l_int32 maxgray, + l_int32 factor, + l_int32 n, + l_float32 *pscore, + PIXA *pixadebug) +{ +char buf[64]; +l_int32 w, h, i, j, nx, ny, ngr; +l_float32 score, minscore, maxval1, maxval2, dist; +L_BMF *bmf; +NUMA *na1, *na2, *na3, *na4, *na5, *na6, *na7; +PIX *pix3, *pix4; +PIXA *pixa1, *pixa2; + + if (!pscore) + return ERROR_INT("&score not defined", __func__, 1); + *pscore = 0.0; + if (!pix1 || !pix2) + return ERROR_INT("pix1 and pix2 not both defined", __func__, 1); + + /* Determine grid from n */ + pixGetDimensions(pix1, &w, &h, NULL); + findHistoGridDimensions(n, w, h, &nx, &ny, 1); + ngr = nx * ny; + + /* Evaluate histograms in each tile */ + pixa1 = pixaSplitPix(pix1, nx, ny, 0, 0); + pixa2 = pixaSplitPix(pix2, nx, ny, 0, 0); + na7 = (pixadebug) ? numaCreate(ngr) : NULL; + bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL; + minscore = 1.0; + for (i = 0; i < ngr; i++) { + pix3 = pixaGetPix(pixa1, i, L_CLONE); + pix4 = pixaGetPix(pixa2, i, L_CLONE); + + /* Get histograms, set white count to 0, normalize max to 255 */ + na1 = pixGetGrayHistogram(pix3, factor); + na2 = pixGetGrayHistogram(pix4, factor); + if (maxgray < 255) { + for (j = maxgray + 1; j <= 255; j++) { + numaSetValue(na1, j, 0); + numaSetValue(na2, j, 0); + } + } + na3 = numaWindowedMean(na1, 5); + na4 = numaWindowedMean(na2, 5); + numaGetMax(na3, &maxval1, NULL); + numaGetMax(na4, &maxval2, NULL); + na5 = numaTransform(na3, 0, 255.0 / maxval1); + na6 = numaTransform(na4, 0, 255.0 / maxval2); + if (pixadebug) { + gplotSimple2(na5, na6, GPLOT_PNG, "/tmp/lept/comp/plot1", "Histos"); + } + + /* To compare histograms, use the normalized earthmover distance. + * Further normalize to get the EM distance as a fraction of the + * maximum distance in the histogram (255). Finally, scale this + * up by 10.0, and subtract from 1.0 to get a similarity score. */ + numaEarthMoverDistance(na5, na6, &dist); + score = L_MAX(0.0, 1.0 - 8.0 * (dist / 255.)); + if (pixadebug) numaAddNumber(na7, score); + minscore = L_MIN(minscore, score); + if (pixadebug) { + PIX *pix5, *pix6, *pix7, *pix8, *pix9, *pix10; + PIXA *pixa3; + l_int32 w, h, wscale; + pixa3 = pixaCreate(3); + pixGetDimensions(pix3, &w, &h, NULL); + wscale = (w > h) ? 700 : 400; + pix5 = pixScaleToSize(pix3, wscale, 0); + pix6 = pixScaleToSize(pix4, wscale, 0); + pixaAddPix(pixa3, pix5, L_INSERT); + pixaAddPix(pixa3, pix6, L_INSERT); + pix7 = pixRead("/tmp/lept/comp/plot1.png"); + pix8 = pixScaleToSize(pix7, 700, 0); + snprintf(buf, sizeof(buf), "%5.3f", score); + pix9 = pixAddTextlines(pix8, bmf, buf, 0x0000ff00, L_ADD_RIGHT); + pixaAddPix(pixa3, pix9, L_INSERT); + pix10 = pixaDisplayTiledInRows(pixa3, 32, 1000, 1.0, 0, 50, 0); + pixaAddPix(pixadebug, pix10, L_INSERT); + pixDestroy(&pix7); + pixDestroy(&pix8); + pixaDestroy(&pixa3); + } + numaDestroy(&na1); + numaDestroy(&na2); + numaDestroy(&na3); + numaDestroy(&na4); + numaDestroy(&na5); + numaDestroy(&na6); + pixDestroy(&pix3); + pixDestroy(&pix4); + } + *pscore = minscore; + + if (pixadebug) { + pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL, + "/tmp/lept/comp/comparegray.pdf"); + numaWriteDebug("/tmp/lept/comp/tilescores.na", na7); + } + + bmfDestroy(&bmf); + numaDestroy(&na7); + pixaDestroy(&pixa1); + pixaDestroy(&pixa2); + return 0; +} + + +/*! + * \brief pixCropAlignedToCentroid() + * + * \param[in] pix1, pix2 any depth; colormap OK + * \param[in] factor subsampling; >= 1 + * \param[out] pbox1 crop box for pix1 + * \param[out] pbox2 crop box for pix2 + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This finds the maximum crop boxes for two 8 bpp images when + * their centroids of their photometric inverses are aligned. + * Black pixels have weight 255; white pixels have weight 0. + * </pre> + */ +l_ok +pixCropAlignedToCentroid(PIX *pix1, + PIX *pix2, + l_int32 factor, + BOX **pbox1, + BOX **pbox2) +{ +l_float32 cx1, cy1, cx2, cy2; +l_int32 w1, h1, w2, h2, icx1, icy1, icx2, icy2; +l_int32 xm, xm1, xm2, xp, xp1, xp2, ym, ym1, ym2, yp, yp1, yp2; +PIX *pix3, *pix4; + + if (pbox1) *pbox1 = NULL; + if (pbox2) *pbox2 = NULL; + if (!pix1 || !pix2) + return ERROR_INT("pix1 and pix2 not both defined", __func__, 1); + if (factor < 1) + return ERROR_INT("subsampling factor must be >= 1", __func__, 1); + if (!pbox1 || !pbox2) + return ERROR_INT("&box1 and &box2 not both defined", __func__, 1); + + pix3 = pixConvertTo8(pix1, FALSE); + pix4 = pixConvertTo8(pix2, FALSE); + pixCentroid8(pix3, factor, &cx1, &cy1); + pixCentroid8(pix4, factor, &cx2, &cy2); + pixGetDimensions(pix3, &w1, &h1, NULL); + pixGetDimensions(pix4, &w2, &h2, NULL); + pixDestroy(&pix3); + pixDestroy(&pix4); + + icx1 = (l_int32)(cx1 + 0.5); + icy1 = (l_int32)(cy1 + 0.5); + icx2 = (l_int32)(cx2 + 0.5); + icy2 = (l_int32)(cy2 + 0.5); + xm = L_MIN(icx1, icx2); + xm1 = icx1 - xm; + xm2 = icx2 - xm; + xp = L_MIN(w1 - icx1, w2 - icx2); /* one pixel beyond to the right */ + xp1 = icx1 + xp; + xp2 = icx2 + xp; + ym = L_MIN(icy1, icy2); + ym1 = icy1 - ym; + ym2 = icy2 - ym; + yp = L_MIN(h1 - icy1, h2 - icy2); /* one pixel below the bottom */ + yp1 = icy1 + yp; + yp2 = icy2 + yp; + *pbox1 = boxCreate(xm1, ym1, xp1 - xm1, yp1 - ym1); + *pbox2 = boxCreate(xm2, ym2, xp2 - xm2, yp2 - ym2); + return 0; +} + + +/*! + * \brief l_compressGrayHistograms() + * + * \param[in] naa set of 256-entry histograms + * \param[in] w, h size of image + * \param[out] psize size of byte array + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This first writes w and h to the byte array as 4 byte ints. + * (2) Then it normalizes each histogram to a max value of 255, + * and saves each value as a byte. If there are + * N histograms, the output bytearray has 8 + 256 * N bytes. + * (3) Further compression of the array with zlib yields only about + * a 25% decrease in size, so we don't bother. If size reduction + * were important, a lossy transform using a 1-dimensional DCT + * would be effective, because we don't care about the fine + * details of these histograms. + * </pre> + */ +l_uint8 * +l_compressGrayHistograms(NUMAA *naa, + l_int32 w, + l_int32 h, + size_t *psize) +{ +l_uint8 *bytea; +l_int32 i, j, n, nn, ival; +l_float32 maxval; +NUMA *na1, *na2; + + if (!psize) + return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL); + *psize = 0; + if (!naa) + return (l_uint8 *)ERROR_PTR("naa not defined", __func__, NULL); + n = numaaGetCount(naa); + for (i = 0; i < n; i++) { + nn = numaaGetNumaCount(naa, i); + if (nn != 256) { + L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i); + return NULL; + } + } + + if ((bytea = (l_uint8 *)LEPT_CALLOC(8 + 256 * n, sizeof(l_uint8))) == NULL) + return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL); + *psize = 8 + 256 * n; + l_setDataFourBytes(bytea, 0, w); + l_setDataFourBytes(bytea, 1, h); + for (i = 0; i < n; i++) { + na1 = numaaGetNuma(naa, i, L_COPY); + numaGetMax(na1, &maxval, NULL); + na2 = numaTransform(na1, 0, 255.0 / maxval); + for (j = 0; j < 256; j++) { + numaGetIValue(na2, j, &ival); + bytea[8 + 256 * i + j] = ival; + } + numaDestroy(&na1); + numaDestroy(&na2); + } + + return bytea; +} + + +/*! + * \brief l_uncompressGrayHistograms() + * + * \param[in] bytea byte array of size 8 + 256 * N, N an integer + * \param[in] size size of byte array + * \param[out] pw width of the image that generated the histograms + * \param[out] ph height of the image + * \return numaa representing N histograms, each with 256 bins, + * or NULL on error. + * + * <pre> + * Notes: + * (1) The first 8 bytes are read as two 32-bit ints. + * (2) Then this constructs a numaa representing some number of + * gray histograms that are normalized such that the max value + * in each histogram is 255. The data is stored as a byte + * array, with 256 bytes holding the data for each histogram. + * Each gray histogram was computed from a tile of a grayscale image. + * </pre> + */ +NUMAA * +l_uncompressGrayHistograms(l_uint8 *bytea, + size_t size, + l_int32 *pw, + l_int32 *ph) +{ +l_int32 i, j, n; +NUMA *na; +NUMAA *naa; + + if (pw) *pw = 0; + if (ph) *ph = 0; + if (!pw || !ph) + return (NUMAA *)ERROR_PTR("&w and &h not both defined", __func__, NULL); + if (!bytea) + return (NUMAA *)ERROR_PTR("bytea not defined", __func__, NULL); + n = (size - 8) / 256; + if ((size - 8) % 256 != 0) + return (NUMAA *)ERROR_PTR("bytea size is invalid", __func__, NULL); + + *pw = l_getDataFourBytes(bytea, 0); + *ph = l_getDataFourBytes(bytea, 1); + naa = numaaCreate(n); + for (i = 0; i < n; i++) { + na = numaCreate(256); + for (j = 0; j < 256; j++) + numaAddNumber(na, bytea[8 + 256 * i + j]); + numaaAddNuma(naa, na, L_INSERT); + } + + return naa; +} + + +/*------------------------------------------------------------------* + * Translated images at the same resolution * + *------------------------------------------------------------------*/ +/*! + * \brief pixCompareWithTranslation() + * + * \param[in] pix1, pix2 any depth; colormap OK + * \param[in] thresh threshold for converting to 1 bpp + * \param[out] pdelx x translation on pix2 to align with pix1 + * \param[out] pdely y translation on pix2 to align with pix1 + * \param[out] pscore correlation score at best alignment + * \param[in] debugflag 1 for debug output; 0 for no debugging + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This does a coarse-to-fine search for best translational + * alignment of two images, measured by a scoring function + * that is the correlation between the fg pixels. + * (2) The threshold is used if the images aren't 1 bpp. + * (3) With debug on, you get a pdf that shows, as a grayscale + * image, the score as a function of shift from the initial + * estimate, for each of the four levels. The shift is 0 at + * the center of the image. + * (4) With debug on, you also get a pdf that shows the + * difference at the best alignment between the two images, + * at each of the four levels. The red and green pixels + * show locations where one image has a fg pixel and the + * other doesn't. The black pixels are where both images + * have fg pixels, and white pixels are where neither image + * has fg pixels. + * </pre> + */ +l_ok +pixCompareWithTranslation(PIX *pix1, + PIX *pix2, + l_int32 thresh, + l_int32 *pdelx, + l_int32 *pdely, + l_float32 *pscore, + l_int32 debugflag) +{ +l_uint8 *subtab; +l_int32 i, level, area1, area2, delx, dely; +l_int32 etransx, etransy, maxshift, dbint; +l_int32 *stab, *ctab; +l_float32 cx1, cx2, cy1, cy2, score; +PIX *pixb1, *pixb2, *pixt1, *pixt2, *pixt3, *pixt4; +PIXA *pixa1, *pixa2, *pixadb = NULL; + + if (pdelx) *pdelx = 0; + if (pdely) *pdely = 0; + if (pscore) *pscore = 0.0; + if (!pdelx || !pdely) + return ERROR_INT("&delx and &dely not defined", __func__, 1); + if (!pscore) + return ERROR_INT("&score not defined", __func__, 1); + if (!pix1) + return ERROR_INT("pix1 not defined", __func__, 1); + if (!pix2) + return ERROR_INT("pix2 not defined", __func__, 1); + + /* Make tables */ + subtab = makeSubsampleTab2x(); + stab = makePixelSumTab8(); + ctab = makePixelCentroidTab8(); + + /* Binarize each image */ + pixb1 = pixConvertTo1(pix1, thresh); + pixb2 = pixConvertTo1(pix2, thresh); + + /* Make a cascade of 2x reduced images for each, thresholding + * with level 2 (neutral), down to 8x reduction */ + pixa1 = pixaCreate(4); + pixa2 = pixaCreate(4); + if (debugflag) + pixadb = pixaCreate(4); + pixaAddPix(pixa1, pixb1, L_INSERT); + pixaAddPix(pixa2, pixb2, L_INSERT); + for (i = 0; i < 3; i++) { + pixt1 = pixReduceRankBinary2(pixb1, 2, subtab); + pixt2 = pixReduceRankBinary2(pixb2, 2, subtab); + pixaAddPix(pixa1, pixt1, L_INSERT); + pixaAddPix(pixa2, pixt2, L_INSERT); + pixb1 = pixt1; + pixb2 = pixt2; + } + + /* At the lowest level, use the centroids with a maxshift of 6 + * to search for the best alignment. Then at higher levels, + * use the result from the level below as the initial approximation + * for the alignment, and search with a maxshift of 2. */ + for (level = 3; level >= 0; level--) { + pixt1 = pixaGetPix(pixa1, level, L_CLONE); + pixt2 = pixaGetPix(pixa2, level, L_CLONE); + pixCountPixels(pixt1, &area1, stab); + pixCountPixels(pixt2, &area2, stab); + if (level == 3) { + pixCentroid(pixt1, ctab, stab, &cx1, &cy1); + pixCentroid(pixt2, ctab, stab, &cx2, &cy2); + etransx = lept_roundftoi(cx1 - cx2); + etransy = lept_roundftoi(cy1 - cy2); + maxshift = 6; + } else { + etransx = 2 * delx; + etransy = 2 * dely; + maxshift = 2; + } + dbint = (debugflag) ? level + 1 : 0; + pixBestCorrelation(pixt1, pixt2, area1, area2, etransx, etransy, + maxshift, stab, &delx, &dely, &score, dbint); + if (debugflag) { + lept_stderr("Level %d: delx = %d, dely = %d, score = %7.4f\n", + level, delx, dely, score); + pixRasteropIP(pixt2, delx, dely, L_BRING_IN_WHITE); + pixt3 = pixDisplayDiffBinary(pixt1, pixt2); + pixt4 = pixExpandReplicate(pixt3, 8 / (1 << (3 - level))); + pixaAddPix(pixadb, pixt4, L_INSERT); + pixDestroy(&pixt3); + } + pixDestroy(&pixt1); + pixDestroy(&pixt2); + } + + if (debugflag) { + pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, NULL, + "/tmp/lept/comp/compare.pdf"); + convertFilesToPdf("/tmp/lept/comp", "correl_", 30, 1.0, L_FLATE_ENCODE, + 0, "Correlation scores at levels 1 through 5", + "/tmp/lept/comp/correl.pdf"); + pixaDestroy(&pixadb); + } + + *pdelx = delx; + *pdely = dely; + *pscore = score; + pixaDestroy(&pixa1); + pixaDestroy(&pixa2); + LEPT_FREE(subtab); + LEPT_FREE(stab); + LEPT_FREE(ctab); + return 0; +} + + +/*! + * \brief pixBestCorrelation() + * + * \param[in] pix1 1 bpp + * \param[in] pix2 1 bpp + * \param[in] area1 number of on pixels in pix1 + * \param[in] area2 number of on pixels in pix2 + * \param[in] etransx estimated x translation of pix2 to align with pix1 + * \param[in] etransy estimated y translation of pix2 to align with pix1 + * \param[in] maxshift max x and y shift of pix2, around the estimated + * alignment location, relative to pix1 + * \param[in] tab8 [optional] sum tab for ON pixels in byte; can be NULL + * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 + * \param[out] pdely [optional] best y shift of pix2 relative to pix1 + * \param[out] pscore [optional] maximum score found; can be NULL + * \param[in] debugflag <= 0 to skip; positive to generate output. + * The integer is used to label the debug image. + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This maximizes the correlation score between two 1 bpp images, + * by starting with an estimate of the alignment + * (%etransx, %etransy) and computing the correlation around this. + * It optionally returns the shift (%delx, %dely) that maximizes + * the correlation score when pix2 is shifted by this amount + * relative to pix1. + * (2) Get the centroids of pix1 and pix2, using pixCentroid(), + * to compute (%etransx, %etransy). Get the areas using + * pixCountPixels(). + * (3) The centroid of pix2 is shifted with respect to the centroid + * of pix1 by all values between -maxshiftx and maxshiftx, + * and likewise for the y shifts. Therefore, the number of + * correlations computed is: + * (2 * maxshiftx + 1) * (2 * maxshifty + 1) + * Consequently, if pix1 and pix2 are large, you should do this + * in a coarse-to-fine sequence. See the use of this function + * in pixCompareWithTranslation(). + * </pre> + */ +l_ok +pixBestCorrelation(PIX *pix1, + PIX *pix2, + l_int32 area1, + l_int32 area2, + l_int32 etransx, + l_int32 etransy, + l_int32 maxshift, + l_int32 *tab8, + l_int32 *pdelx, + l_int32 *pdely, + l_float32 *pscore, + l_int32 debugflag) +{ +l_int32 shiftx, shifty, delx, dely; +l_int32 *tab; +l_float32 maxscore, score; +FPIX *fpix = NULL; +PIX *pix3, *pix4; + + if (pdelx) *pdelx = 0; + if (pdely) *pdely = 0; + if (pscore) *pscore = 0.0; + if (!pix1 || pixGetDepth(pix1) != 1) + return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1); + if (!pix2 || pixGetDepth(pix2) != 1) + return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1); + if (!area1 || !area2) + return ERROR_INT("areas must be > 0", __func__, 1); + + if (debugflag > 0) + fpix = fpixCreate(2 * maxshift + 1, 2 * maxshift + 1); + + if (!tab8) + tab = makePixelSumTab8(); + else + tab = tab8; + + /* Search over a set of {shiftx, shifty} for the max */ + maxscore = 0; + delx = etransx; + dely = etransy; + for (shifty = -maxshift; shifty <= maxshift; shifty++) { + for (shiftx = -maxshift; shiftx <= maxshift; shiftx++) { + pixCorrelationScoreShifted(pix1, pix2, area1, area2, + etransx + shiftx, + etransy + shifty, tab, &score); + if (debugflag > 0) { + fpixSetPixel(fpix, maxshift + shiftx, maxshift + shifty, + 1000.0 * score); +/* lept_stderr("(sx, sy) = (%d, %d): score = %6.4f\n", + shiftx, shifty, score); */ + } + if (score > maxscore) { + maxscore = score; + delx = etransx + shiftx; + dely = etransy + shifty; + } + } + } + + if (debugflag > 0) { + char buf[128]; + lept_mkdir("lept/comp"); + pix3 = fpixDisplayMaxDynamicRange(fpix); + pix4 = pixExpandReplicate(pix3, 20); + snprintf(buf, sizeof(buf), "/tmp/lept/comp/correl_%d.png", + debugflag); + pixWrite(buf, pix4, IFF_PNG); + pixDestroy(&pix3); + pixDestroy(&pix4); + fpixDestroy(&fpix); + } + + if (pdelx) *pdelx = delx; + if (pdely) *pdely = dely; + if (pscore) *pscore = maxscore; + if (!tab8) LEPT_FREE(tab); + return 0; +}
