Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/html/epub-doc.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "html-imp.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 #include <math.h> | |
| 28 | |
| 29 #include <zlib.h> /* for crc32 */ | |
| 30 | |
| 31 enum { T, R, B, L }; | |
| 32 | |
| 33 typedef struct epub_chapter epub_chapter; | |
| 34 typedef struct epub_page epub_page; | |
| 35 | |
| 36 typedef struct | |
| 37 { | |
| 38 int max_chapters; | |
| 39 int num_chapters; | |
| 40 float layout_w; | |
| 41 float layout_h; | |
| 42 float layout_em; | |
| 43 uint32_t css_sum; | |
| 44 int use_doc_css; | |
| 45 int *pages_in_chapter; | |
| 46 } epub_accelerator; | |
| 47 | |
| 48 typedef struct | |
| 49 { | |
| 50 fz_document super; | |
| 51 fz_archive *zip; | |
| 52 fz_html_font_set *set; | |
| 53 int count; | |
| 54 epub_chapter *spine; | |
| 55 fz_outline *outline; | |
| 56 char *dc_title, *dc_creator; | |
| 57 float layout_w, layout_h, layout_em; | |
| 58 epub_accelerator *accel; | |
| 59 uint32_t css_sum; | |
| 60 | |
| 61 /* A common pattern of use is for us to open a document, | |
| 62 * load a page, draw it, drop it, load the next page, | |
| 63 * draw it, drop it etc. This means that the HTML for | |
| 64 * a chapter might get thrown away between the drop and | |
| 65 * the the next load (if the chapter is large, and the | |
| 66 * store size is low). Accordingly, we store a handle | |
| 67 * to the most recently used html block here, thus | |
| 68 * ensuring that the stored copy won't be evicted. */ | |
| 69 fz_html *most_recent_html; | |
| 70 } epub_document; | |
| 71 | |
| 72 struct epub_chapter | |
| 73 { | |
| 74 epub_document *doc; | |
| 75 char *path; | |
| 76 int number; | |
| 77 epub_chapter *next; | |
| 78 }; | |
| 79 | |
| 80 struct epub_page | |
| 81 { | |
| 82 fz_page super; | |
| 83 epub_chapter *ch; | |
| 84 int number; | |
| 85 fz_html *html; | |
| 86 }; | |
| 87 | |
| 88 static uint32_t | |
| 89 user_css_sum(fz_context *ctx) | |
| 90 { | |
| 91 uint32_t sum = 0; | |
| 92 const char *css = fz_user_css(ctx); | |
| 93 sum = crc32(0, NULL, 0); | |
| 94 if (css) | |
| 95 sum = crc32(sum, (Byte*)css, (int)strlen(css)); | |
| 96 return sum; | |
| 97 } | |
| 98 | |
| 99 static int dummy = 1; | |
| 100 | |
| 101 struct encrypted { | |
| 102 fz_archive super; | |
| 103 fz_archive *chain; | |
| 104 fz_tree *info; | |
| 105 }; | |
| 106 | |
| 107 static int has_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 108 { | |
| 109 struct encrypted *arch = (struct encrypted *)arch_; | |
| 110 return fz_has_archive_entry(ctx, arch->chain, name); | |
| 111 } | |
| 112 | |
| 113 static fz_stream *open_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 114 { | |
| 115 struct encrypted *arch = (struct encrypted *)arch_; | |
| 116 if (fz_tree_lookup(ctx, arch->info, name)) | |
| 117 return NULL; | |
| 118 return fz_open_archive_entry(ctx, arch->chain, name); | |
| 119 } | |
| 120 | |
| 121 static fz_buffer *read_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 122 { | |
| 123 struct encrypted *arch = (struct encrypted *)arch_; | |
| 124 if (fz_tree_lookup(ctx, arch->info, name)) | |
| 125 return NULL; | |
| 126 return fz_read_archive_entry(ctx, arch->chain, name); | |
| 127 } | |
| 128 | |
| 129 static void drop_encrypted_archive(fz_context *ctx, fz_archive *arch_) | |
| 130 { | |
| 131 struct encrypted *arch = (struct encrypted *)arch_; | |
| 132 fz_drop_tree(ctx, arch->info, NULL); | |
| 133 fz_drop_archive(ctx, arch->chain); | |
| 134 } | |
| 135 | |
| 136 static fz_archive *new_encrypted_archive(fz_context *ctx, fz_archive *chain, fz_tree *info) | |
| 137 { | |
| 138 struct encrypted *arch; | |
| 139 | |
| 140 arch = fz_new_derived_archive(ctx, NULL, struct encrypted); | |
| 141 arch->super.format = "encrypted"; | |
| 142 arch->super.has_entry = has_encrypted_entry; | |
| 143 arch->super.read_entry = read_encrypted_entry; | |
| 144 arch->super.open_entry = open_encrypted_entry; | |
| 145 arch->super.drop_archive = drop_encrypted_archive; | |
| 146 arch->chain = chain; | |
| 147 arch->info = info; | |
| 148 | |
| 149 return &arch->super; | |
| 150 } | |
| 151 | |
| 152 static void | |
| 153 epub_parse_encryption(fz_context *ctx, epub_document *doc, fz_xml *root) | |
| 154 { | |
| 155 fz_tree *info = NULL; | |
| 156 fz_xml *edata; | |
| 157 | |
| 158 for (edata = fz_xml_find_down(root, "EncryptedData"); edata; edata = fz_xml_find_next(edata, "EncryptedData")) | |
| 159 { | |
| 160 fz_xml *cdata = fz_xml_find_down(edata, "CipherData"); | |
| 161 fz_xml *cref = fz_xml_find_down(cdata, "CipherReference"); | |
| 162 char *uri = fz_xml_att(cref, "URI"); | |
| 163 if (uri) | |
| 164 { | |
| 165 // TODO: Support reading EncryptedKey and EncryptionMethod to decrypt content. | |
| 166 info = fz_tree_insert(ctx, info, uri, &dummy); | |
| 167 } | |
| 168 } | |
| 169 | |
| 170 if (info) | |
| 171 { | |
| 172 doc->zip = new_encrypted_archive(ctx, doc->zip, info); | |
| 173 } | |
| 174 } | |
| 175 | |
| 176 static fz_html *epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch); | |
| 177 | |
| 178 static int count_laid_out_pages(fz_html *html) | |
| 179 { | |
| 180 if (html->tree.root->s.layout.b > 0) | |
| 181 return ceilf(html->tree.root->s.layout.b / html->page_h); | |
| 182 return 1; | |
| 183 } | |
| 184 | |
| 185 static void | |
| 186 invalidate_accelerator(fz_context *ctx, epub_accelerator *acc) | |
| 187 { | |
| 188 int i; | |
| 189 | |
| 190 for (i = 0; i < acc->max_chapters; i++) | |
| 191 acc->pages_in_chapter[i] = -1; | |
| 192 } | |
| 193 | |
| 194 static int count_chapter_pages(fz_context *ctx, epub_document *doc, epub_chapter *ch) | |
| 195 { | |
| 196 epub_accelerator *acc = doc->accel; | |
| 197 int use_doc_css = fz_use_document_css(ctx); | |
| 198 | |
| 199 if (use_doc_css != acc->use_doc_css || doc->css_sum != acc->css_sum) | |
| 200 { | |
| 201 acc->use_doc_css = use_doc_css; | |
| 202 acc->css_sum = doc->css_sum; | |
| 203 invalidate_accelerator(ctx, acc); | |
| 204 } | |
| 205 | |
| 206 if (ch->number < acc->num_chapters && acc->pages_in_chapter[ch->number] != -1) | |
| 207 return acc->pages_in_chapter[ch->number]; | |
| 208 | |
| 209 fz_drop_html(ctx, epub_get_laid_out_html(ctx, doc, ch)); | |
| 210 return acc->pages_in_chapter[ch->number]; | |
| 211 } | |
| 212 | |
| 213 static fz_link_dest | |
| 214 epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest) | |
| 215 { | |
| 216 epub_document *doc = (epub_document*)doc_; | |
| 217 epub_chapter *ch; | |
| 218 int i; | |
| 219 | |
| 220 const char *s = strchr(dest, '#'); | |
| 221 size_t n = s ? (size_t)(s - dest) : strlen(dest); | |
| 222 if (s && s[1] == 0) | |
| 223 s = NULL; | |
| 224 | |
| 225 for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) | |
| 226 { | |
| 227 if (!strncmp(ch->path, dest, n) && ch->path[n] == 0) | |
| 228 { | |
| 229 if (s) | |
| 230 { | |
| 231 float y; | |
| 232 fz_html *html = epub_get_laid_out_html(ctx, doc, ch); | |
| 233 int ph = html->page_h; | |
| 234 | |
| 235 /* Search for a matching fragment */ | |
| 236 y = fz_find_html_target(ctx, html, s+1); | |
| 237 fz_drop_html(ctx, html); | |
| 238 if (y >= 0) | |
| 239 { | |
| 240 int page = y / ph; | |
| 241 return fz_make_link_dest_xyz(i, page, 0, y - page * ph, 0); | |
| 242 } | |
| 243 return fz_make_link_dest_none(); | |
| 244 } | |
| 245 return fz_make_link_dest_xyz(i, 0, 0, 0, 0); | |
| 246 } | |
| 247 } | |
| 248 | |
| 249 return fz_make_link_dest_none(); | |
| 250 } | |
| 251 | |
| 252 static void | |
| 253 epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) | |
| 254 { | |
| 255 epub_document *doc = (epub_document*)doc_; | |
| 256 uint32_t css_sum = user_css_sum(ctx); | |
| 257 int use_doc_css = fz_use_document_css(ctx); | |
| 258 | |
| 259 if (doc->layout_w == w && doc->layout_h == h && doc->layout_em == em && doc->css_sum == css_sum) | |
| 260 return; | |
| 261 doc->layout_w = w; | |
| 262 doc->layout_h = h; | |
| 263 doc->layout_em = em; | |
| 264 | |
| 265 if (doc->accel == NULL) | |
| 266 return; | |
| 267 | |
| 268 /* When we load the saved accelerator, doc->accel | |
| 269 * can be populated with different values than doc. | |
| 270 * This is really useful as doc starts out with the | |
| 271 * values being 0. If we've got the right values | |
| 272 * already, then don't bin the data! */ | |
| 273 if (doc->accel->layout_w == w && | |
| 274 doc->accel->layout_h == h && | |
| 275 doc->accel->layout_em == em && | |
| 276 doc->accel->use_doc_css == use_doc_css && | |
| 277 doc->accel->css_sum == css_sum) | |
| 278 return; | |
| 279 | |
| 280 doc->accel->layout_w = w; | |
| 281 doc->accel->layout_h = h; | |
| 282 doc->accel->layout_em = em; | |
| 283 doc->accel->use_doc_css = use_doc_css; | |
| 284 doc->accel->css_sum = css_sum; | |
| 285 invalidate_accelerator(ctx, doc->accel); | |
| 286 } | |
| 287 | |
| 288 static int | |
| 289 epub_count_chapters(fz_context *ctx, fz_document *doc_) | |
| 290 { | |
| 291 epub_document *doc = (epub_document*)doc_; | |
| 292 epub_chapter *ch; | |
| 293 int count = 0; | |
| 294 for (ch = doc->spine; ch; ch = ch->next) | |
| 295 ++count; | |
| 296 return count; | |
| 297 } | |
| 298 | |
| 299 static int | |
| 300 epub_count_pages(fz_context *ctx, fz_document *doc_, int chapter) | |
| 301 { | |
| 302 epub_document *doc = (epub_document*)doc_; | |
| 303 epub_chapter *ch; | |
| 304 int i; | |
| 305 for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) | |
| 306 { | |
| 307 if (i == chapter) | |
| 308 { | |
| 309 return count_chapter_pages(ctx, doc, ch); | |
| 310 } | |
| 311 } | |
| 312 return 0; | |
| 313 } | |
| 314 | |
| 315 #define MAGIC_ACCELERATOR 0xacce1e7a | |
| 316 #define MAGIC_ACCEL_EPUB 0x62755065 | |
| 317 #define ACCEL_VERSION 0x00010001 | |
| 318 | |
| 319 static void epub_load_accelerator(fz_context *ctx, epub_document *doc, fz_stream *accel) | |
| 320 { | |
| 321 int v; | |
| 322 float w, h, em; | |
| 323 int num_chapters; | |
| 324 epub_accelerator *acc = NULL; | |
| 325 uint32_t css_sum; | |
| 326 int use_doc_css; | |
| 327 int make_new = (accel == NULL); | |
| 328 | |
| 329 fz_var(acc); | |
| 330 | |
| 331 if (accel) | |
| 332 { | |
| 333 /* Try to read the accelerator data. If we fail silently give up. */ | |
| 334 fz_try(ctx) | |
| 335 { | |
| 336 v = fz_read_int32_le(ctx, accel); | |
| 337 if (v != (int32_t)MAGIC_ACCELERATOR) | |
| 338 { | |
| 339 make_new = 1; | |
| 340 break; | |
| 341 } | |
| 342 | |
| 343 v = fz_read_int32_le(ctx, accel); | |
| 344 if (v != MAGIC_ACCEL_EPUB) | |
| 345 { | |
| 346 make_new = 1; | |
| 347 break; | |
| 348 } | |
| 349 | |
| 350 v = fz_read_int32_le(ctx, accel); | |
| 351 if (v != ACCEL_VERSION) | |
| 352 { | |
| 353 make_new = 1; | |
| 354 break; | |
| 355 } | |
| 356 | |
| 357 w = fz_read_float_le(ctx, accel); | |
| 358 h = fz_read_float_le(ctx, accel); | |
| 359 em = fz_read_float_le(ctx, accel); | |
| 360 css_sum = fz_read_uint32_le(ctx, accel); | |
| 361 use_doc_css = fz_read_int32_le(ctx, accel); | |
| 362 | |
| 363 num_chapters = fz_read_int32_le(ctx, accel); | |
| 364 if (num_chapters <= 0) | |
| 365 { | |
| 366 make_new = 1; | |
| 367 break; | |
| 368 } | |
| 369 | |
| 370 acc = fz_malloc_struct(ctx, epub_accelerator); | |
| 371 acc->pages_in_chapter = Memento_label(fz_malloc_array(ctx, num_chapters, int), "accel_pages_in_chapter"); | |
| 372 acc->max_chapters = acc->num_chapters = num_chapters; | |
| 373 acc->layout_w = w; | |
| 374 acc->layout_h = h; | |
| 375 acc->layout_em = em; | |
| 376 acc->css_sum = css_sum; | |
| 377 acc->use_doc_css = use_doc_css; | |
| 378 | |
| 379 for (v = 0; v < num_chapters; v++) | |
| 380 acc->pages_in_chapter[v] = fz_read_int32_le(ctx, accel); | |
| 381 } | |
| 382 fz_catch(ctx) | |
| 383 { | |
| 384 if (acc) | |
| 385 fz_free(ctx, acc->pages_in_chapter); | |
| 386 fz_free(ctx, acc); | |
| 387 /* Swallow the error and run unaccelerated */ | |
| 388 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 389 fz_report_error(ctx); | |
| 390 make_new = 1; | |
| 391 } | |
| 392 } | |
| 393 | |
| 394 /* If we aren't given an accelerator to load (or the one we're given | |
| 395 * is bad) create a blank stub and we can fill it out as we go. */ | |
| 396 if (make_new) | |
| 397 { | |
| 398 acc = fz_malloc_struct(ctx, epub_accelerator); | |
| 399 acc->css_sum = doc->css_sum; | |
| 400 acc->use_doc_css = fz_use_document_css(ctx); | |
| 401 } | |
| 402 | |
| 403 doc->accel = acc; | |
| 404 } | |
| 405 | |
| 406 static void | |
| 407 accelerate_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch, fz_html *html) | |
| 408 { | |
| 409 epub_accelerator *acc = doc->accel; | |
| 410 int p = count_laid_out_pages(html); | |
| 411 | |
| 412 if (ch->number < acc->num_chapters) | |
| 413 { | |
| 414 if (acc->pages_in_chapter[ch->number] != p && acc->pages_in_chapter[ch->number] != -1) | |
| 415 { | |
| 416 fz_warn(ctx, "Invalidating stale accelerator data."); | |
| 417 invalidate_accelerator(ctx, doc->accel); | |
| 418 } | |
| 419 acc->pages_in_chapter[ch->number] = p; | |
| 420 return; | |
| 421 } | |
| 422 | |
| 423 if (ch->number >= acc->max_chapters) | |
| 424 { | |
| 425 int n = acc->max_chapters; | |
| 426 int i; | |
| 427 if (n == 0) | |
| 428 n = 4; | |
| 429 while (n <= ch->number) | |
| 430 n *= 2; | |
| 431 | |
| 432 acc->pages_in_chapter = fz_realloc_array(ctx, acc->pages_in_chapter, n, int); | |
| 433 for (i = acc->max_chapters; i < n; i++) | |
| 434 acc->pages_in_chapter[i] = -1; | |
| 435 acc->max_chapters = n; | |
| 436 } | |
| 437 acc->pages_in_chapter[ch->number] = p; | |
| 438 if (acc->num_chapters < ch->number+1) | |
| 439 acc->num_chapters = ch->number+1; | |
| 440 } | |
| 441 | |
| 442 static void | |
| 443 epub_drop_page(fz_context *ctx, fz_page *page_) | |
| 444 { | |
| 445 epub_page *page = (epub_page *)page_; | |
| 446 fz_drop_html(ctx, page->html); | |
| 447 } | |
| 448 | |
| 449 static epub_chapter * | |
| 450 epub_load_chapter(fz_context *ctx, epub_document *doc, const char *path, int i) | |
| 451 { | |
| 452 epub_chapter *ch; | |
| 453 | |
| 454 ch = fz_malloc_struct(ctx, epub_chapter); | |
| 455 fz_try(ctx) | |
| 456 { | |
| 457 ch->path = Memento_label(fz_strdup(ctx, path), "chapter_path"); | |
| 458 ch->number = i; | |
| 459 } | |
| 460 fz_catch(ctx) | |
| 461 { | |
| 462 fz_free(ctx, ch); | |
| 463 fz_rethrow(ctx); | |
| 464 } | |
| 465 | |
| 466 return ch; | |
| 467 } | |
| 468 | |
| 469 static fz_html * | |
| 470 epub_parse_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch) | |
| 471 { | |
| 472 fz_archive *zip = doc->zip; | |
| 473 fz_buffer *buf; | |
| 474 char base_uri[2048]; | |
| 475 fz_html *html; | |
| 476 | |
| 477 /* Look for one we made earlier */ | |
| 478 html = fz_find_html(ctx, doc, ch->number); | |
| 479 if (html) | |
| 480 return html; | |
| 481 | |
| 482 fz_dirname(base_uri, ch->path, sizeof base_uri); | |
| 483 | |
| 484 buf = fz_read_archive_entry(ctx, zip, ch->path); | |
| 485 fz_try(ctx) | |
| 486 html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx), 1, 1, 0); | |
| 487 fz_always(ctx) | |
| 488 fz_drop_buffer(ctx, buf); | |
| 489 fz_catch(ctx) | |
| 490 fz_rethrow(ctx); | |
| 491 | |
| 492 return fz_store_html(ctx, html, doc, ch->number); | |
| 493 } | |
| 494 | |
| 495 static fz_html * | |
| 496 epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch) | |
| 497 { | |
| 498 fz_html *html = epub_parse_chapter(ctx, doc, ch); | |
| 499 fz_try(ctx) | |
| 500 { | |
| 501 fz_layout_html(ctx, html, doc->layout_w, doc->layout_h, doc->layout_em); | |
| 502 accelerate_chapter(ctx, doc, ch, html); | |
| 503 } | |
| 504 fz_catch(ctx) | |
| 505 { | |
| 506 fz_drop_html(ctx, html); | |
| 507 fz_rethrow(ctx); | |
| 508 } | |
| 509 | |
| 510 fz_drop_html(ctx, doc->most_recent_html); | |
| 511 doc->most_recent_html = fz_keep_html(ctx, html); | |
| 512 | |
| 513 return html; | |
| 514 } | |
| 515 | |
| 516 static fz_rect | |
| 517 epub_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box) | |
| 518 { | |
| 519 epub_document *doc = (epub_document*)page_->doc; | |
| 520 epub_page *page = (epub_page*)page_; | |
| 521 epub_chapter *ch = page->ch; | |
| 522 fz_rect bbox; | |
| 523 fz_html *html = epub_get_laid_out_html(ctx, doc, ch); | |
| 524 | |
| 525 bbox.x0 = 0; | |
| 526 bbox.y0 = 0; | |
| 527 bbox.x1 = html->page_w + html->page_margin[L] + html->page_margin[R]; | |
| 528 bbox.y1 = html->page_h + html->page_margin[T] + html->page_margin[B]; | |
| 529 fz_drop_html(ctx, html); | |
| 530 return bbox; | |
| 531 } | |
| 532 | |
| 533 static void | |
| 534 epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) | |
| 535 { | |
| 536 epub_page *page = (epub_page*)page_; | |
| 537 | |
| 538 fz_draw_html(ctx, dev, ctm, page->html, page->number); | |
| 539 } | |
| 540 | |
| 541 static fz_link * | |
| 542 epub_load_links(fz_context *ctx, fz_page *page_) | |
| 543 { | |
| 544 epub_page *page = (epub_page*)page_; | |
| 545 epub_chapter *ch = page->ch; | |
| 546 | |
| 547 return fz_load_html_links(ctx, page->html, page->number, ch->path); | |
| 548 } | |
| 549 | |
| 550 static fz_bookmark | |
| 551 epub_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc) | |
| 552 { | |
| 553 epub_document *doc = (epub_document*)doc_; | |
| 554 epub_chapter *ch; | |
| 555 int i; | |
| 556 | |
| 557 for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) | |
| 558 { | |
| 559 if (i == loc.chapter) | |
| 560 { | |
| 561 fz_html *html = epub_get_laid_out_html(ctx, doc, ch); | |
| 562 fz_bookmark mark = fz_make_html_bookmark(ctx, html, loc.page); | |
| 563 fz_drop_html(ctx, html); | |
| 564 return mark; | |
| 565 } | |
| 566 } | |
| 567 | |
| 568 return 0; | |
| 569 } | |
| 570 | |
| 571 static fz_location | |
| 572 epub_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark) | |
| 573 { | |
| 574 epub_document *doc = (epub_document*)doc_; | |
| 575 epub_chapter *ch; | |
| 576 int i; | |
| 577 | |
| 578 for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) | |
| 579 { | |
| 580 fz_html *html = epub_get_laid_out_html(ctx, doc, ch); | |
| 581 int p = fz_lookup_html_bookmark(ctx, html, mark); | |
| 582 fz_drop_html(ctx, html); | |
| 583 if (p != -1) | |
| 584 return fz_make_location(i, p); | |
| 585 } | |
| 586 return fz_make_location(-1, -1); | |
| 587 } | |
| 588 | |
| 589 static fz_page * | |
| 590 epub_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number) | |
| 591 { | |
| 592 epub_document *doc = (epub_document*)doc_; | |
| 593 epub_chapter *ch; | |
| 594 int i; | |
| 595 | |
| 596 if (chapter < 0) | |
| 597 fz_throw(ctx, FZ_ERROR_ARGUMENT, "invalid chapter number: %d", chapter); | |
| 598 if (number < 0) | |
| 599 fz_throw(ctx, FZ_ERROR_ARGUMENT, "invalid page number: %d", number); | |
| 600 | |
| 601 for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) | |
| 602 { | |
| 603 if (i == chapter) | |
| 604 { | |
| 605 epub_page *page = fz_new_derived_page(ctx, epub_page, doc_); | |
| 606 page->super.bound_page = epub_bound_page; | |
| 607 page->super.run_page_contents = epub_run_page; | |
| 608 page->super.load_links = epub_load_links; | |
| 609 page->super.drop_page = epub_drop_page; | |
| 610 page->ch = ch; | |
| 611 page->number = number; | |
| 612 page->html = epub_get_laid_out_html(ctx, doc, ch); | |
| 613 return (fz_page*)page; | |
| 614 } | |
| 615 } | |
| 616 return NULL; | |
| 617 } | |
| 618 | |
| 619 static void | |
| 620 epub_page_label(fz_context *ctx, fz_document *doc_, int chapter, int number, char *buf, size_t size) | |
| 621 { | |
| 622 fz_snprintf(buf, size, "ch. %d, p. %d", chapter+1, number+1); | |
| 623 } | |
| 624 | |
| 625 static void | |
| 626 epub_drop_accelerator(fz_context *ctx, epub_accelerator *acc) | |
| 627 { | |
| 628 if (acc == NULL) | |
| 629 return; | |
| 630 | |
| 631 fz_free(ctx, acc->pages_in_chapter); | |
| 632 fz_free(ctx, acc); | |
| 633 } | |
| 634 | |
| 635 static void | |
| 636 epub_drop_document(fz_context *ctx, fz_document *doc_) | |
| 637 { | |
| 638 epub_document *doc = (epub_document*)doc_; | |
| 639 epub_chapter *ch, *next; | |
| 640 ch = doc->spine; | |
| 641 while (ch) | |
| 642 { | |
| 643 next = ch->next; | |
| 644 fz_free(ctx, ch->path); | |
| 645 fz_free(ctx, ch); | |
| 646 ch = next; | |
| 647 } | |
| 648 epub_drop_accelerator(ctx, doc->accel); | |
| 649 fz_drop_archive(ctx, doc->zip); | |
| 650 fz_drop_html_font_set(ctx, doc->set); | |
| 651 fz_drop_outline(ctx, doc->outline); | |
| 652 fz_free(ctx, doc->dc_title); | |
| 653 fz_free(ctx, doc->dc_creator); | |
| 654 fz_drop_html(ctx, doc->most_recent_html); | |
| 655 fz_purge_stored_html(ctx, doc); | |
| 656 } | |
| 657 | |
| 658 static const char * | |
| 659 rel_path_from_idref(fz_xml *manifest, const char *idref) | |
| 660 { | |
| 661 fz_xml *item; | |
| 662 if (!idref) | |
| 663 return NULL; | |
| 664 item = fz_xml_find_down(manifest, "item"); | |
| 665 while (item) | |
| 666 { | |
| 667 const char *id = fz_xml_att(item, "id"); | |
| 668 if (id && !strcmp(id, idref)) | |
| 669 return fz_xml_att(item, "href"); | |
| 670 item = fz_xml_find_next(item, "item"); | |
| 671 } | |
| 672 return NULL; | |
| 673 } | |
| 674 | |
| 675 static const char * | |
| 676 path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n) | |
| 677 { | |
| 678 const char *rel_path = rel_path_from_idref(manifest, idref); | |
| 679 if (!rel_path) | |
| 680 { | |
| 681 path[0] = 0; | |
| 682 return NULL; | |
| 683 } | |
| 684 fz_strlcpy(path, base_uri, n); | |
| 685 fz_strlcat(path, "/", n); | |
| 686 fz_strlcat(path, rel_path, n); | |
| 687 return fz_cleanname(fz_urldecode(path)); | |
| 688 } | |
| 689 | |
| 690 static fz_outline * | |
| 691 epub_parse_ncx_imp(fz_context *ctx, epub_document *doc, fz_xml *node, char *base_uri) | |
| 692 { | |
| 693 char path[2048]; | |
| 694 fz_outline *outline, *head, **tailp; | |
| 695 | |
| 696 head = NULL; | |
| 697 tailp = &head; | |
| 698 | |
| 699 node = fz_xml_find_down(node, "navPoint"); | |
| 700 while (node) | |
| 701 { | |
| 702 char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(fz_xml_find_down(node, "navLabel"), "text"))); | |
| 703 char *content = fz_xml_att(fz_xml_find_down(node, "content"), "src"); | |
| 704 if (text && content) | |
| 705 { | |
| 706 fz_strlcpy(path, base_uri, sizeof path); | |
| 707 fz_strlcat(path, "/", sizeof path); | |
| 708 fz_strlcat(path, content, sizeof path); | |
| 709 fz_urldecode(path); | |
| 710 fz_cleanname(path); | |
| 711 | |
| 712 fz_try(ctx) | |
| 713 { | |
| 714 *tailp = outline = fz_new_outline(ctx); | |
| 715 tailp = &(*tailp)->next; | |
| 716 outline->title = Memento_label(fz_strdup(ctx, text), "outline_title"); | |
| 717 outline->uri = Memento_label(fz_strdup(ctx, path), "outline_uri"); | |
| 718 outline->page = fz_make_location(-1, -1); | |
| 719 outline->down = epub_parse_ncx_imp(ctx, doc, node, base_uri); | |
| 720 outline->is_open = 1; | |
| 721 } | |
| 722 fz_catch(ctx) | |
| 723 { | |
| 724 fz_drop_outline(ctx, head); | |
| 725 fz_rethrow(ctx); | |
| 726 } | |
| 727 } | |
| 728 node = fz_xml_find_next(node, "navPoint"); | |
| 729 } | |
| 730 | |
| 731 return head; | |
| 732 } | |
| 733 | |
| 734 static void | |
| 735 epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path) | |
| 736 { | |
| 737 fz_archive *zip = doc->zip; | |
| 738 fz_buffer *buf = NULL; | |
| 739 fz_xml_doc *ncx = NULL; | |
| 740 char base_uri[2048]; | |
| 741 | |
| 742 fz_var(buf); | |
| 743 fz_var(ncx); | |
| 744 | |
| 745 fz_try(ctx) | |
| 746 { | |
| 747 fz_dirname(base_uri, path, sizeof base_uri); | |
| 748 buf = fz_read_archive_entry(ctx, zip, path); | |
| 749 ncx = fz_parse_xml(ctx, buf, 0); | |
| 750 doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(fz_xml_root(ncx), "navMap"), base_uri); | |
| 751 } | |
| 752 fz_always(ctx) | |
| 753 { | |
| 754 fz_drop_buffer(ctx, buf); | |
| 755 fz_drop_xml(ctx, ncx); | |
| 756 } | |
| 757 fz_catch(ctx) | |
| 758 fz_rethrow(ctx); | |
| 759 } | |
| 760 | |
| 761 static char * | |
| 762 find_metadata(fz_context *ctx, fz_xml *metadata, char *key) | |
| 763 { | |
| 764 char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(metadata, key))); | |
| 765 if (text) | |
| 766 return fz_strdup(ctx, text); | |
| 767 return NULL; | |
| 768 } | |
| 769 | |
| 770 static fz_buffer * | |
| 771 read_container_and_prefix(fz_context *ctx, fz_archive *zip, char *prefix, size_t prefix_len) | |
| 772 { | |
| 773 int n = fz_count_archive_entries(ctx, zip); | |
| 774 int i; | |
| 775 | |
| 776 prefix[0] = 0; | |
| 777 | |
| 778 /* First off, look for the container.xml at the top level. */ | |
| 779 for (i = 0; i < n; i++) | |
| 780 { | |
| 781 const char *p = fz_list_archive_entry(ctx, zip, i); | |
| 782 | |
| 783 if (!strcmp(p, "META-INF/container.xml")) | |
| 784 return fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); | |
| 785 } | |
| 786 | |
| 787 /* If that failed, look for the first such file in a subdirectory. */ | |
| 788 for (i = 0; i < n; i++) | |
| 789 { | |
| 790 const char *p = fz_list_archive_entry(ctx, zip, i); | |
| 791 size_t z = strlen(p); | |
| 792 size_t z0 = sizeof("META-INF/container.xml")-1; | |
| 793 | |
| 794 if (z < z0) | |
| 795 continue; | |
| 796 if (!strcmp(p + z - z0, "META-INF/container.xml")) | |
| 797 { | |
| 798 if (z - z0 >= prefix_len) | |
| 799 { | |
| 800 fz_warn(ctx, "Ignoring %s as path too long.", p); | |
| 801 continue; | |
| 802 } | |
| 803 memcpy(prefix, p, z-z0); | |
| 804 prefix[z-z0] = 0; | |
| 805 return fz_read_archive_entry(ctx, zip, p); | |
| 806 } | |
| 807 } | |
| 808 | |
| 809 return fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); | |
| 810 } | |
| 811 | |
| 812 static void | |
| 813 epub_parse_header(fz_context *ctx, epub_document *doc) | |
| 814 { | |
| 815 fz_archive *zip = doc->zip; | |
| 816 fz_buffer *buf = NULL; | |
| 817 fz_xml_doc *encryption_xml = NULL; | |
| 818 fz_xml_doc *container_xml = NULL; | |
| 819 fz_xml_doc *content_opf = NULL; | |
| 820 fz_xml *container, *rootfiles, *rootfile; | |
| 821 fz_xml *package, *manifest, *spine, *itemref, *metadata; | |
| 822 char base_uri[2048]; | |
| 823 const char *full_path; | |
| 824 const char *version; | |
| 825 char ncx[2048], s[2048]; | |
| 826 char *prefixed_full_path = NULL; | |
| 827 size_t prefix_len; | |
| 828 epub_chapter **tailp; | |
| 829 int i; | |
| 830 | |
| 831 fz_var(buf); | |
| 832 fz_var(encryption_xml); | |
| 833 fz_var(container_xml); | |
| 834 fz_var(content_opf); | |
| 835 fz_var(prefixed_full_path); | |
| 836 | |
| 837 fz_try(ctx) | |
| 838 { | |
| 839 /* parse META-INF/encryption.xml to figure out which entries are encrypted */ | |
| 840 | |
| 841 /* parse META-INF/container.xml to find OPF */ | |
| 842 /* Reuse base_uri to read the prefix. */ | |
| 843 buf = read_container_and_prefix(ctx, zip, base_uri, sizeof(base_uri)); | |
| 844 container_xml = fz_parse_xml(ctx, buf, 0); | |
| 845 fz_drop_buffer(ctx, buf); | |
| 846 buf = NULL; | |
| 847 | |
| 848 /* Some epub files can be prefixed by a directory name. This (normally | |
| 849 * empty!) will be in base_uri. */ | |
| 850 prefix_len = strlen(base_uri); | |
| 851 { | |
| 852 /* Further abuse base_uri to hold a temporary name. */ | |
| 853 const size_t z0 = sizeof("META-INF/encryption.xml")-1; | |
| 854 if (sizeof(base_uri) <= prefix_len + z0) | |
| 855 fz_throw(ctx, FZ_ERROR_FORMAT, "Prefix too long in epub"); | |
| 856 strcpy(base_uri + prefix_len, "META-INF/encryption.xml"); | |
| 857 if (fz_has_archive_entry(ctx, zip, base_uri)) | |
| 858 { | |
| 859 fz_warn(ctx, "EPUB may be locked by DRM"); | |
| 860 | |
| 861 buf = fz_read_archive_entry(ctx, zip, base_uri); | |
| 862 encryption_xml = fz_parse_xml(ctx, buf, 0); | |
| 863 fz_drop_buffer(ctx, buf); | |
| 864 buf = NULL; | |
| 865 | |
| 866 epub_parse_encryption(ctx, doc, fz_xml_find(fz_xml_root(encryption_xml), "encryption")); | |
| 867 zip = doc->zip; | |
| 868 } | |
| 869 } | |
| 870 | |
| 871 container = fz_xml_find(fz_xml_root(container_xml), "container"); | |
| 872 rootfiles = fz_xml_find_down(container, "rootfiles"); | |
| 873 rootfile = fz_xml_find_down(rootfiles, "rootfile"); | |
| 874 full_path = fz_xml_att(rootfile, "full-path"); | |
| 875 if (!full_path) | |
| 876 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find root file in EPUB"); | |
| 877 | |
| 878 fz_dirname(base_uri+prefix_len, full_path, sizeof(base_uri) - prefix_len); | |
| 879 | |
| 880 prefixed_full_path = fz_malloc(ctx, strlen(full_path) + prefix_len + 1); | |
| 881 memcpy(prefixed_full_path, base_uri, prefix_len); | |
| 882 strcpy(prefixed_full_path + prefix_len, full_path); | |
| 883 | |
| 884 /* parse OPF to find NCX and spine */ | |
| 885 | |
| 886 buf = fz_read_archive_entry(ctx, zip, prefixed_full_path); | |
| 887 content_opf = fz_parse_xml(ctx, buf, 0); | |
| 888 fz_drop_buffer(ctx, buf); | |
| 889 buf = NULL; | |
| 890 | |
| 891 package = fz_xml_find(fz_xml_root(content_opf), "package"); | |
| 892 version = fz_xml_att(package, "version"); | |
| 893 if (!version || strcmp(version, "2.0")) | |
| 894 fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); | |
| 895 | |
| 896 metadata = fz_xml_find_down(package, "metadata"); | |
| 897 if (metadata) | |
| 898 { | |
| 899 doc->dc_title = Memento_label(find_metadata(ctx, metadata, "title"), "epub_title"); | |
| 900 doc->dc_creator = Memento_label(find_metadata(ctx, metadata, "creator"), "epub_creator"); | |
| 901 } | |
| 902 | |
| 903 manifest = fz_xml_find_down(package, "manifest"); | |
| 904 spine = fz_xml_find_down(package, "spine"); | |
| 905 | |
| 906 if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) | |
| 907 { | |
| 908 epub_parse_ncx(ctx, doc, ncx); | |
| 909 } | |
| 910 | |
| 911 doc->spine = NULL; | |
| 912 tailp = &doc->spine; | |
| 913 itemref = fz_xml_find_down(spine, "itemref"); | |
| 914 i = 0; | |
| 915 while (itemref) | |
| 916 { | |
| 917 if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) | |
| 918 { | |
| 919 fz_try(ctx) | |
| 920 { | |
| 921 *tailp = epub_load_chapter(ctx, doc, s, i); | |
| 922 tailp = &(*tailp)->next; | |
| 923 i++; | |
| 924 } | |
| 925 fz_catch(ctx) | |
| 926 { | |
| 927 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 928 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 929 fz_report_error(ctx); | |
| 930 fz_warn(ctx, "ignoring chapter %s", s); | |
| 931 } | |
| 932 } | |
| 933 itemref = fz_xml_find_next(itemref, "itemref"); | |
| 934 } | |
| 935 } | |
| 936 fz_always(ctx) | |
| 937 { | |
| 938 fz_drop_xml(ctx, content_opf); | |
| 939 fz_drop_xml(ctx, container_xml); | |
| 940 fz_drop_xml(ctx, encryption_xml); | |
| 941 fz_drop_buffer(ctx, buf); | |
| 942 fz_free(ctx, prefixed_full_path); | |
| 943 } | |
| 944 fz_catch(ctx) | |
| 945 fz_rethrow(ctx); | |
| 946 } | |
| 947 | |
| 948 static fz_outline * | |
| 949 epub_load_outline(fz_context *ctx, fz_document *doc_) | |
| 950 { | |
| 951 epub_document *doc = (epub_document*)doc_; | |
| 952 return fz_keep_outline(ctx, doc->outline); | |
| 953 } | |
| 954 | |
| 955 static int | |
| 956 epub_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size) | |
| 957 { | |
| 958 epub_document *doc = (epub_document*)doc_; | |
| 959 if (!strcmp(key, FZ_META_FORMAT)) | |
| 960 return 1 + (int)fz_strlcpy(buf, "EPUB", size); | |
| 961 if (!strcmp(key, FZ_META_INFO_TITLE) && doc->dc_title) | |
| 962 return 1 + (int)fz_strlcpy(buf, doc->dc_title, size); | |
| 963 if (!strcmp(key, FZ_META_INFO_AUTHOR) && doc->dc_creator) | |
| 964 return 1 + (int)fz_strlcpy(buf, doc->dc_creator, size); | |
| 965 return -1; | |
| 966 } | |
| 967 | |
| 968 static void | |
| 969 epub_output_accelerator(fz_context *ctx, fz_document *doc_, fz_output *out) | |
| 970 { | |
| 971 epub_document *doc = (epub_document*)doc_; | |
| 972 int i; | |
| 973 | |
| 974 fz_try(ctx) | |
| 975 { | |
| 976 if (doc->accel == NULL) | |
| 977 fz_throw(ctx, FZ_ERROR_ARGUMENT, "No accelerator data to write"); | |
| 978 | |
| 979 fz_write_int32_le(ctx, out, MAGIC_ACCELERATOR); | |
| 980 fz_write_int32_le(ctx, out, MAGIC_ACCEL_EPUB); | |
| 981 fz_write_int32_le(ctx, out, ACCEL_VERSION); | |
| 982 fz_write_float_le(ctx, out, doc->accel->layout_w); | |
| 983 fz_write_float_le(ctx, out, doc->accel->layout_h); | |
| 984 fz_write_float_le(ctx, out, doc->accel->layout_em); | |
| 985 fz_write_uint32_le(ctx, out, doc->accel->css_sum); | |
| 986 fz_write_int32_le(ctx, out, doc->accel->use_doc_css); | |
| 987 fz_write_int32_le(ctx, out, doc->accel->num_chapters); | |
| 988 for (i = 0; i < doc->accel->num_chapters; i++) | |
| 989 fz_write_int32_le(ctx, out, doc->accel->pages_in_chapter[i]); | |
| 990 | |
| 991 fz_close_output(ctx, out); | |
| 992 } | |
| 993 fz_always(ctx) | |
| 994 fz_drop_output(ctx, out); | |
| 995 fz_catch(ctx) | |
| 996 fz_rethrow(ctx); | |
| 997 } | |
| 998 | |
| 999 /* Takes ownership of zip. Will always eventually drop it. | |
| 1000 * Never takes ownership of accel. */ | |
| 1001 static fz_document * | |
| 1002 epub_init(fz_context *ctx, fz_archive *zip, fz_stream *accel) | |
| 1003 { | |
| 1004 epub_document *doc = NULL; | |
| 1005 | |
| 1006 fz_var(doc); | |
| 1007 fz_var(zip); | |
| 1008 | |
| 1009 fz_try(ctx) | |
| 1010 { | |
| 1011 doc = fz_new_derived_document(ctx, epub_document); | |
| 1012 doc->zip = zip; | |
| 1013 zip = NULL; | |
| 1014 | |
| 1015 doc->super.drop_document = epub_drop_document; | |
| 1016 doc->super.layout = epub_layout; | |
| 1017 doc->super.load_outline = epub_load_outline; | |
| 1018 doc->super.resolve_link_dest = epub_resolve_link; | |
| 1019 doc->super.make_bookmark = epub_make_bookmark; | |
| 1020 doc->super.lookup_bookmark = epub_lookup_bookmark; | |
| 1021 doc->super.count_chapters = epub_count_chapters; | |
| 1022 doc->super.count_pages = epub_count_pages; | |
| 1023 doc->super.load_page = epub_load_page; | |
| 1024 doc->super.page_label = epub_page_label; | |
| 1025 doc->super.lookup_metadata = epub_lookup_metadata; | |
| 1026 doc->super.output_accelerator = epub_output_accelerator; | |
| 1027 doc->super.is_reflowable = 1; | |
| 1028 | |
| 1029 doc->set = fz_new_html_font_set(ctx); | |
| 1030 doc->css_sum = user_css_sum(ctx); | |
| 1031 epub_load_accelerator(ctx, doc, accel); | |
| 1032 epub_parse_header(ctx, doc); | |
| 1033 } | |
| 1034 fz_catch(ctx) | |
| 1035 { | |
| 1036 fz_drop_archive(ctx, zip); | |
| 1037 fz_drop_document(ctx, &doc->super); | |
| 1038 fz_rethrow(ctx); | |
| 1039 } | |
| 1040 | |
| 1041 return (fz_document*)doc; | |
| 1042 } | |
| 1043 | |
| 1044 static fz_document * | |
| 1045 epub_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state) | |
| 1046 { | |
| 1047 fz_stream *file2 = NULL; | |
| 1048 fz_document *doc; | |
| 1049 fz_archive *zip = NULL; | |
| 1050 | |
| 1051 if (file == NULL) | |
| 1052 { | |
| 1053 /* Directory case: file == NULL and dir == the directory. */ | |
| 1054 if (fz_has_archive_entry(ctx, dir, "META-INF/container.xml")) | |
| 1055 file2 = file = fz_open_archive_entry(ctx, dir, "META-INF/container.xml"); | |
| 1056 else | |
| 1057 file2 = file = fz_open_archive_entry(ctx, dir, "META-INF\\container.xml"); | |
| 1058 if (file == NULL) | |
| 1059 fz_throw(ctx, FZ_ERROR_FORMAT, "Not an epub file"); | |
| 1060 zip = fz_keep_archive(ctx, dir); | |
| 1061 } | |
| 1062 else | |
| 1063 { | |
| 1064 /* File case: file != NULL and dir can be ignored. */ | |
| 1065 zip = fz_open_archive_with_stream(ctx, file); | |
| 1066 } | |
| 1067 | |
| 1068 | |
| 1069 fz_try(ctx) | |
| 1070 doc = epub_init(ctx, zip, file); | |
| 1071 fz_always(ctx) | |
| 1072 fz_drop_stream(ctx, file2); | |
| 1073 fz_catch(ctx) | |
| 1074 fz_rethrow(ctx); | |
| 1075 | |
| 1076 return doc; | |
| 1077 } | |
| 1078 | |
| 1079 static int | |
| 1080 epub_recognize(fz_context *doc, const fz_document_handler *handler, const char *magic) | |
| 1081 { | |
| 1082 if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml")) | |
| 1083 return 200; | |
| 1084 return 0; | |
| 1085 } | |
| 1086 | |
| 1087 static int | |
| 1088 epub_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state) | |
| 1089 { | |
| 1090 fz_archive *arch = NULL; | |
| 1091 int ret = 0; | |
| 1092 | |
| 1093 fz_var(arch); | |
| 1094 fz_var(ret); | |
| 1095 | |
| 1096 if (state) | |
| 1097 *state = NULL; | |
| 1098 if (free_state) | |
| 1099 *free_state = NULL; | |
| 1100 | |
| 1101 fz_try(ctx) | |
| 1102 { | |
| 1103 if (stream == NULL) | |
| 1104 arch = fz_keep_archive(ctx, dir); | |
| 1105 else | |
| 1106 { | |
| 1107 arch = fz_try_open_archive_with_stream(ctx, stream); | |
| 1108 if (arch == NULL) | |
| 1109 break; | |
| 1110 } | |
| 1111 | |
| 1112 if (fz_has_archive_entry(ctx, arch, "META-INF/container.xml") || | |
| 1113 fz_has_archive_entry(ctx, arch, "META-INF\\container.xml")) | |
| 1114 ret = 74; /* One less than the 75 that HWPX files are detected as. */ | |
| 1115 } | |
| 1116 fz_always(ctx) | |
| 1117 fz_drop_archive(ctx, arch); | |
| 1118 fz_catch(ctx) | |
| 1119 fz_rethrow(ctx); | |
| 1120 | |
| 1121 return ret; | |
| 1122 } | |
| 1123 | |
| 1124 static const char *epub_extensions[] = | |
| 1125 { | |
| 1126 "epub", | |
| 1127 NULL | |
| 1128 }; | |
| 1129 | |
| 1130 static const char *epub_mimetypes[] = | |
| 1131 { | |
| 1132 "application/epub+zip", | |
| 1133 NULL | |
| 1134 }; | |
| 1135 | |
| 1136 fz_document_handler epub_document_handler = | |
| 1137 { | |
| 1138 epub_recognize, | |
| 1139 epub_open_document, | |
| 1140 epub_extensions, | |
| 1141 epub_mimetypes, | |
| 1142 epub_recognize_content | |
| 1143 }; |
