Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/api/renderer.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: renderer.cpp | |
| 3 // Description: Rendering interface to inject into TessBaseAPI | |
| 4 // | |
| 5 // (C) Copyright 2011, Google Inc. | |
| 6 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 // you may not use this file except in compliance with the License. | |
| 8 // You may obtain a copy of the License at | |
| 9 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 // Unless required by applicable law or agreed to in writing, software | |
| 11 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 // See the License for the specific language governing permissions and | |
| 14 // limitations under the License. | |
| 15 // | |
| 16 /////////////////////////////////////////////////////////////////////// | |
| 17 | |
| 18 #ifdef HAVE_CONFIG_H | |
| 19 # include "config_auto.h" | |
| 20 #endif | |
| 21 #include <tesseract/baseapi.h> | |
| 22 #include <tesseract/renderer.h> | |
| 23 #include <cstring> | |
| 24 #include <memory> // std::unique_ptr | |
| 25 #include <string> // std::string | |
| 26 #include "serialis.h" // Serialize | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 /********************************************************************** | |
| 31 * Base Renderer interface implementation | |
| 32 **********************************************************************/ | |
| 33 TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension) | |
| 34 : next_(nullptr) | |
| 35 , fout_(stdout) | |
| 36 , file_extension_(extension) | |
| 37 , title_("") | |
| 38 , imagenum_(-1) | |
| 39 , happy_(true) { | |
| 40 if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { | |
| 41 std::string outfile = std::string(outputbase) + "." + extension; | |
| 42 fout_ = fopen(outfile.c_str(), "wb"); | |
| 43 if (fout_ == nullptr) { | |
| 44 happy_ = false; | |
| 45 } | |
| 46 } | |
| 47 } | |
| 48 | |
| 49 TessResultRenderer::~TessResultRenderer() { | |
| 50 if (fout_ != nullptr) { | |
| 51 if (fout_ != stdout) { | |
| 52 fclose(fout_); | |
| 53 } else { | |
| 54 clearerr(fout_); | |
| 55 } | |
| 56 } | |
| 57 delete next_; | |
| 58 } | |
| 59 | |
| 60 void TessResultRenderer::insert(TessResultRenderer *next) { | |
| 61 if (next == nullptr) { | |
| 62 return; | |
| 63 } | |
| 64 | |
| 65 TessResultRenderer *remainder = next_; | |
| 66 next_ = next; | |
| 67 if (remainder) { | |
| 68 while (next->next_ != nullptr) { | |
| 69 next = next->next_; | |
| 70 } | |
| 71 next->next_ = remainder; | |
| 72 } | |
| 73 } | |
| 74 | |
| 75 bool TessResultRenderer::BeginDocument(const char *title) { | |
| 76 if (!happy_) { | |
| 77 return false; | |
| 78 } | |
| 79 title_ = title; | |
| 80 imagenum_ = -1; | |
| 81 bool ok = BeginDocumentHandler(); | |
| 82 if (next_) { | |
| 83 ok = next_->BeginDocument(title) && ok; | |
| 84 } | |
| 85 return ok; | |
| 86 } | |
| 87 | |
| 88 bool TessResultRenderer::AddImage(TessBaseAPI *api) { | |
| 89 if (!happy_) { | |
| 90 return false; | |
| 91 } | |
| 92 ++imagenum_; | |
| 93 bool ok = AddImageHandler(api); | |
| 94 if (next_) { | |
| 95 ok = next_->AddImage(api) && ok; | |
| 96 } | |
| 97 return ok; | |
| 98 } | |
| 99 | |
| 100 bool TessResultRenderer::EndDocument() { | |
| 101 if (!happy_) { | |
| 102 return false; | |
| 103 } | |
| 104 bool ok = EndDocumentHandler(); | |
| 105 if (next_) { | |
| 106 ok = next_->EndDocument() && ok; | |
| 107 } | |
| 108 return ok; | |
| 109 } | |
| 110 | |
| 111 void TessResultRenderer::AppendString(const char *s) { | |
| 112 if (s == nullptr) { | |
| 113 return; | |
| 114 } | |
| 115 AppendData(s, strlen(s)); | |
| 116 } | |
| 117 | |
| 118 void TessResultRenderer::AppendData(const char *s, int len) { | |
| 119 if (!tesseract::Serialize(fout_, s, len)) { | |
| 120 happy_ = false; | |
| 121 } | |
| 122 fflush(fout_); | |
| 123 } | |
| 124 | |
| 125 bool TessResultRenderer::BeginDocumentHandler() { | |
| 126 return happy_; | |
| 127 } | |
| 128 | |
| 129 bool TessResultRenderer::EndDocumentHandler() { | |
| 130 return happy_; | |
| 131 } | |
| 132 | |
| 133 /********************************************************************** | |
| 134 * UTF8 Text Renderer interface implementation | |
| 135 **********************************************************************/ | |
| 136 TessTextRenderer::TessTextRenderer(const char *outputbase) | |
| 137 : TessResultRenderer(outputbase, "txt") {} | |
| 138 | |
| 139 bool TessTextRenderer::AddImageHandler(TessBaseAPI *api) { | |
| 140 const std::unique_ptr<const char[]> utf8(api->GetUTF8Text()); | |
| 141 if (utf8 == nullptr) { | |
| 142 return false; | |
| 143 } | |
| 144 | |
| 145 const char *pageSeparator = api->GetStringVariable("page_separator"); | |
| 146 if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) { | |
| 147 AppendString(pageSeparator); | |
| 148 } | |
| 149 | |
| 150 AppendString(utf8.get()); | |
| 151 | |
| 152 return true; | |
| 153 } | |
| 154 | |
| 155 /********************************************************************** | |
| 156 * TSV Text Renderer interface implementation | |
| 157 **********************************************************************/ | |
| 158 TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") { | |
| 159 font_info_ = false; | |
| 160 } | |
| 161 | |
| 162 TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) | |
| 163 : TessResultRenderer(outputbase, "tsv") { | |
| 164 font_info_ = font_info; | |
| 165 } | |
| 166 | |
| 167 bool TessTsvRenderer::BeginDocumentHandler() { | |
| 168 // Output TSV column headings | |
| 169 AppendString( | |
| 170 "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" | |
| 171 "num\tleft\ttop\twidth\theight\tconf\ttext\n"); | |
| 172 return true; | |
| 173 } | |
| 174 | |
| 175 bool TessTsvRenderer::EndDocumentHandler() { | |
| 176 return true; | |
| 177 } | |
| 178 | |
| 179 bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) { | |
| 180 const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum())); | |
| 181 if (tsv == nullptr) { | |
| 182 return false; | |
| 183 } | |
| 184 | |
| 185 AppendString(tsv.get()); | |
| 186 | |
| 187 return true; | |
| 188 } | |
| 189 | |
| 190 /********************************************************************** | |
| 191 * UNLV Text Renderer interface implementation | |
| 192 **********************************************************************/ | |
| 193 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) | |
| 194 : TessResultRenderer(outputbase, "unlv") {} | |
| 195 | |
| 196 bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) { | |
| 197 const std::unique_ptr<const char[]> unlv(api->GetUNLVText()); | |
| 198 if (unlv == nullptr) { | |
| 199 return false; | |
| 200 } | |
| 201 | |
| 202 AppendString(unlv.get()); | |
| 203 | |
| 204 return true; | |
| 205 } | |
| 206 | |
| 207 /********************************************************************** | |
| 208 * BoxText Renderer interface implementation | |
| 209 **********************************************************************/ | |
| 210 TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) | |
| 211 : TessResultRenderer(outputbase, "box") {} | |
| 212 | |
| 213 bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) { | |
| 214 const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum())); | |
| 215 if (text == nullptr) { | |
| 216 return false; | |
| 217 } | |
| 218 | |
| 219 AppendString(text.get()); | |
| 220 | |
| 221 return true; | |
| 222 } | |
| 223 | |
| 224 #ifndef DISABLED_LEGACY_ENGINE | |
| 225 | |
| 226 /********************************************************************** | |
| 227 * Osd Text Renderer interface implementation | |
| 228 **********************************************************************/ | |
| 229 TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {} | |
| 230 | |
| 231 bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) { | |
| 232 const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum())); | |
| 233 if (osd == nullptr) { | |
| 234 return false; | |
| 235 } | |
| 236 | |
| 237 AppendString(osd.get()); | |
| 238 | |
| 239 return true; | |
| 240 } | |
| 241 | |
| 242 #endif // ndef DISABLED_LEGACY_ENGINE | |
| 243 | |
| 244 } // namespace tesseract |
