comparison mupdf-source/source/fitz/output-docx.c @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #if FZ_ENABLE_DOCX_OUTPUT
26
27 #include "glyphbox.h"
28 #include "extract/extract.h"
29 #include "extract/buffer.h"
30
31 #include <assert.h>
32 #include <errno.h>
33 #include <string.h>
34
35
36 typedef struct
37 {
38 fz_document_writer super;
39 extract_alloc_t *alloc;
40
41 /*
42 * .ctx is needed for the callbacks we get from the Extract library, for
43 * example s_realloc_fn(). Each of our main device callbacks sets .ctx on
44 * entry, and resets back to NULL before returning.
45 */
46 fz_context *ctx;
47
48 fz_output *output;
49 extract_t *extract;
50 int spacing;
51 int rotation;
52 int images;
53 int mediabox_clip;
54 fz_rect mediabox; /* As passed to writer_begin_page(). */
55 char output_cache[1024];
56 } fz_docx_writer;
57
58
59 typedef struct
60 {
61 fz_device super;
62 fz_docx_writer *writer;
63 } fz_docx_device;
64
65
66 static void dev_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
67 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
68 {
69 fz_docx_device *dev = (fz_docx_device*) dev_;
70 fz_text_span *span;
71 assert(!dev->writer->ctx);
72 dev->writer->ctx = ctx;
73 fz_try(ctx)
74 {
75 for (span = text->head; span; span = span->next)
76 {
77 int i;
78 fz_matrix combined, trm;
79 fz_rect bbox;
80
81 combined = fz_concat(span->trm, ctm);
82
83 bbox = span->font->bbox;
84 if (extract_span_begin(
85 dev->writer->extract,
86 span->font->name,
87 span->font->flags.is_bold,
88 span->font->flags.is_italic,
89 span->wmode,
90 combined.a,
91 combined.b,
92 combined.c,
93 combined.d,
94 bbox.x0,
95 bbox.y0,
96 bbox.x1,
97 bbox.y1))
98 {
99 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin span");
100 }
101
102 trm = span->trm;
103 for (i=0; i<span->len; ++i)
104 {
105 fz_text_item *item = &span->items[i];
106 float adv = 0;
107 fz_rect bounds;
108
109 trm.e = item->x;
110 trm.f = item->y;
111 combined = fz_concat(trm, ctm);
112
113 if (dev->writer->mediabox_clip)
114 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, item, &dev->writer->mediabox))
115 continue;
116
117 if (span->items[i].gid >= 0)
118 adv = span->items[i].adv;
119
120 bounds = fz_bound_glyph(ctx, span->font, span->items[i].gid, combined);
121 if (extract_add_char(dev->writer->extract, combined.e, combined.f, item->ucs, adv,
122 bounds.x0, bounds.y0, bounds.x1, bounds.y1))
123 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add char");
124 }
125
126 if (extract_span_end(dev->writer->extract))
127 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end span");
128 }
129 }
130 fz_always(ctx)
131 {
132 dev->writer->ctx = NULL;
133 }
134 fz_catch(ctx)
135 {
136 fz_rethrow(ctx);
137 }
138 }
139
140 static void dev_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
141 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
142 {
143 dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
144 }
145
146 static void dev_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
147 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
148 {
149 dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
150 }
151
152 static void dev_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
153 {
154 dev_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params);
155 }
156
157 static void dev_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
158 {
159 dev_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params);
160 }
161
162 static void
163 dev_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm)
164 {
165 }
166
167 static void writer_image_free(void *handle, void *image_data)
168 {
169 fz_docx_writer *writer = handle;
170 fz_free(writer->ctx, image_data);
171 }
172
173 static void dev_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
174 {
175 fz_docx_device *dev = (fz_docx_device*) dev_;
176 const char *type = NULL;
177 fz_compressed_buffer *compressed = fz_compressed_image_buffer(ctx, img);
178
179 assert(!dev->writer->ctx);
180 dev->writer->ctx = ctx;
181 fz_try(ctx)
182 {
183 if (compressed)
184 {
185 if (0) { /* For alignment */ }
186 else if (compressed->params.type == FZ_IMAGE_RAW) type = "raw";
187 else if (compressed->params.type == FZ_IMAGE_FAX) type = "fax";
188 else if (compressed->params.type == FZ_IMAGE_FLATE) type = "flate";
189 else if (compressed->params.type == FZ_IMAGE_LZW) type = "lzw";
190 else if (compressed->params.type == FZ_IMAGE_BROTLI) type = "brotli";
191 else if (compressed->params.type == FZ_IMAGE_BMP) type = "bmp";
192 else if (compressed->params.type == FZ_IMAGE_GIF) type = "gif";
193 else if (compressed->params.type == FZ_IMAGE_JBIG2) type = "jbig2";
194 else if (compressed->params.type == FZ_IMAGE_JPEG) type = "jpeg";
195 else if (compressed->params.type == FZ_IMAGE_JPX) type = "jpx";
196 else if (compressed->params.type == FZ_IMAGE_JXR) type = "jxr";
197 else if (compressed->params.type == FZ_IMAGE_PNG) type = "png";
198 else if (compressed->params.type == FZ_IMAGE_PNM) type = "pnm";
199 else if (compressed->params.type == FZ_IMAGE_TIFF) type = "tiff";
200
201 if (type)
202 {
203 /* Write out raw data. */
204 unsigned char *data;
205 size_t datasize = fz_buffer_extract(ctx, compressed->buffer, &data);
206 if (extract_add_image(
207 dev->writer->extract,
208 type,
209 ctm.e /*x*/,
210 ctm.f /*y*/,
211 img->w /*w*/,
212 img->h /*h*/,
213 data,
214 datasize,
215 writer_image_free,
216 dev->writer
217 ))
218 {
219 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add image type=%s", type);
220 }
221 }
222 else
223 {
224 /* We don't recognise this image type, so ignore. */
225 }
226 }
227 else
228 {
229 /*
230 * Compressed data not available, so we could write out
231 * raw pixel values. But for now we ignore.
232 */
233 }
234 }
235 fz_always(ctx)
236 {
237 dev->writer->ctx = NULL;
238 }
239 fz_catch(ctx)
240 {
241 fz_rethrow(ctx);
242 }
243 }
244
245 /*
246 * Support for sending information to Extract when walking stroke/fill path
247 * with fz_walk_path().
248 */
249 typedef struct
250 {
251 fz_path_walker walker;
252 extract_t *extract;
253 } walker_info_t;
254
255 static void s_moveto(fz_context *ctx, void *arg, float x, float y)
256 {
257 extract_t* extract = arg;
258 if (extract_moveto(extract, x, y))
259 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
260 }
261
262 static void s_lineto(fz_context *ctx, void *arg, float x, float y)
263 {
264 extract_t* extract = arg;
265 if (extract_lineto(extract, x, y))
266 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_lineto() failed");
267 }
268
269 static void s_curveto(fz_context *ctx, void *arg, float x1, float y1,
270 float x2, float y2, float x3, float y3)
271 {
272 /* We simply move to the end point of the curve so that subsequent
273 (straight) lines will be handled correctly. */
274 extract_t* extract = arg;
275 if (extract_moveto(extract, x3, y3))
276 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
277 }
278
279 static void s_closepath(fz_context *ctx, void *arg)
280 {
281 extract_t* extract = arg;
282 if (extract_closepath(extract))
283 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_closepath() failed");
284 }
285
286 /*
287 * Calls extract_*() path functions on <path> using fz_walk_path() and the
288 * above callbacks.
289 */
290 static void s_walk_path(fz_context *ctx, fz_docx_device *dev, extract_t *extract, const fz_path *path)
291 {
292 fz_path_walker walker;
293 walker.moveto = s_moveto;
294 walker.lineto = s_lineto;
295 walker.curveto = s_curveto;
296 walker.closepath = s_closepath;
297 walker.quadto = NULL;
298 walker.curvetov = NULL;
299 walker.curvetoy = NULL;
300 walker.rectto = NULL;
301
302 assert(dev->writer->ctx == ctx);
303 fz_walk_path(ctx, path, &walker, extract /*arg*/);
304 }
305
306 void dev_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd,
307 fz_matrix matrix, fz_colorspace * colorspace, const float *color, float alpha,
308 fz_color_params color_params)
309 {
310 fz_docx_device *dev = (fz_docx_device*) dev_;
311 extract_t *extract = dev->writer->extract;
312
313 assert(!dev->writer->ctx);
314 dev->writer->ctx = ctx;
315
316 fz_try(ctx)
317 {
318 if (extract_fill_begin(
319 extract,
320 matrix.a,
321 matrix.b,
322 matrix.c,
323 matrix.d,
324 matrix.e,
325 matrix.f,
326 color[0]
327 ))
328 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin fill");
329 s_walk_path(ctx, dev, extract, path);
330 if (extract_fill_end(extract))
331 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_fill_end() failed");
332 }
333 fz_always(ctx)
334 {
335 dev->writer->ctx = NULL;
336 }
337 fz_catch(ctx)
338 {
339 fz_rethrow(ctx);
340 }
341 }
342
343
344 static void
345 dev_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
346 const fz_stroke_state *stroke, fz_matrix in_ctm,
347 fz_colorspace *colorspace_in, const float *color, float alpha,
348 fz_color_params color_params)
349 {
350 fz_docx_device *dev = (fz_docx_device*) dev_;
351 extract_t *extract = dev->writer->extract;
352
353 assert(!dev->writer->ctx);
354 dev->writer->ctx = ctx;
355 fz_try(ctx)
356 {
357 if (extract_stroke_begin(
358 extract,
359 in_ctm.a,
360 in_ctm.b,
361 in_ctm.c,
362 in_ctm.d,
363 in_ctm.e,
364 in_ctm.f,
365 stroke->linewidth,
366 color[0]
367 ))
368 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin stroke");
369 s_walk_path(ctx, dev, extract, path);
370 if (extract_stroke_end(extract))
371 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_stroke_end() failed");
372 }
373 fz_always(ctx)
374 {
375 dev->writer->ctx = NULL;
376 }
377 fz_catch(ctx)
378 {
379 fz_rethrow(ctx);
380 }
381 }
382
383 static extract_struct_t
384 fz_struct_to_extract(fz_structure type)
385 {
386 switch (type)
387 {
388 default:
389 return extract_struct_INVALID;
390
391 case FZ_STRUCTURE_DOCUMENT:
392 return extract_struct_DOCUMENT;
393 case FZ_STRUCTURE_PART:
394 return extract_struct_PART;
395 case FZ_STRUCTURE_ART:
396 return extract_struct_ART;
397 case FZ_STRUCTURE_SECT:
398 return extract_struct_SECT;
399 case FZ_STRUCTURE_DIV:
400 return extract_struct_DIV;
401 case FZ_STRUCTURE_BLOCKQUOTE:
402 return extract_struct_BLOCKQUOTE;
403 case FZ_STRUCTURE_CAPTION:
404 return extract_struct_CAPTION;
405 case FZ_STRUCTURE_TOC:
406 return extract_struct_TOC;
407 case FZ_STRUCTURE_TOCI:
408 return extract_struct_TOCI;
409 case FZ_STRUCTURE_INDEX:
410 return extract_struct_INDEX;
411 case FZ_STRUCTURE_NONSTRUCT:
412 return extract_struct_NONSTRUCT;
413 case FZ_STRUCTURE_PRIVATE:
414 return extract_struct_PRIVATE;
415 /* Grouping elements (PDF 2.0 - Table 364) */
416 case FZ_STRUCTURE_DOCUMENTFRAGMENT:
417 return extract_struct_DOCUMENTFRAGMENT;
418 /* Grouping elements (PDF 2.0 - Table 365) */
419 case FZ_STRUCTURE_ASIDE:
420 return extract_struct_ASIDE;
421 /* Grouping elements (PDF 2.0 - Table 366) */
422 case FZ_STRUCTURE_TITLE:
423 return extract_struct_TITLE;
424 case FZ_STRUCTURE_FENOTE:
425 return extract_struct_FENOTE;
426 /* Grouping elements (PDF 2.0 - Table 367) */
427 case FZ_STRUCTURE_SUB:
428 return extract_struct_SUB;
429
430 /* Paragraphlike elements (PDF 1.7 - Table 10.21) */
431 case FZ_STRUCTURE_P:
432 return extract_struct_P;
433 case FZ_STRUCTURE_H:
434 return extract_struct_H;
435 case FZ_STRUCTURE_H1:
436 return extract_struct_H1;
437 case FZ_STRUCTURE_H2:
438 return extract_struct_H2;
439 case FZ_STRUCTURE_H3:
440 return extract_struct_H3;
441 case FZ_STRUCTURE_H4:
442 return extract_struct_H4;
443 case FZ_STRUCTURE_H5:
444 return extract_struct_H5;
445 case FZ_STRUCTURE_H6:
446 return extract_struct_H6;
447
448 /* List elements (PDF 1.7 - Table 10.23) */
449 case FZ_STRUCTURE_LIST:
450 return extract_struct_LIST;
451 case FZ_STRUCTURE_LISTITEM:
452 return extract_struct_LISTITEM;
453 case FZ_STRUCTURE_LABEL:
454 return extract_struct_LABEL;
455 case FZ_STRUCTURE_LISTBODY:
456 return extract_struct_LISTBODY;
457
458 /* Table elements (PDF 1.7 - Table 10.24) */
459 case FZ_STRUCTURE_TABLE:
460 return extract_struct_TABLE;
461 case FZ_STRUCTURE_TR:
462 return extract_struct_TR;
463 case FZ_STRUCTURE_TH:
464 return extract_struct_TH;
465 case FZ_STRUCTURE_TD:
466 return extract_struct_TD;
467 case FZ_STRUCTURE_THEAD:
468 return extract_struct_THEAD;
469 case FZ_STRUCTURE_TBODY:
470 return extract_struct_TBODY;
471 case FZ_STRUCTURE_TFOOT:
472 return extract_struct_TFOOT;
473
474 /* Inline elements (PDF 1.7 - Table 10.25) */
475 case FZ_STRUCTURE_SPAN:
476 return extract_struct_SPAN;
477 case FZ_STRUCTURE_QUOTE:
478 return extract_struct_QUOTE;
479 case FZ_STRUCTURE_NOTE:
480 return extract_struct_NOTE;
481 case FZ_STRUCTURE_REFERENCE:
482 return extract_struct_REFERENCE;
483 case FZ_STRUCTURE_BIBENTRY:
484 return extract_struct_BIBENTRY;
485 case FZ_STRUCTURE_CODE:
486 return extract_struct_CODE;
487 case FZ_STRUCTURE_LINK:
488 return extract_struct_LINK;
489 case FZ_STRUCTURE_ANNOT:
490 return extract_struct_ANNOT;
491 /* Inline elements (PDF 2.0 - Table 368) */
492 case FZ_STRUCTURE_EM:
493 return extract_struct_EM;
494 case FZ_STRUCTURE_STRONG:
495 return extract_struct_STRONG;
496
497 /* Ruby inline element (PDF 1.7 - Table 10.26) */
498 case FZ_STRUCTURE_RUBY:
499 return extract_struct_RUBY;
500 case FZ_STRUCTURE_RB:
501 return extract_struct_RB;
502 case FZ_STRUCTURE_RT:
503 return extract_struct_RT;
504 case FZ_STRUCTURE_RP:
505 return extract_struct_RP;
506
507 /* Warichu inline element (PDF 1.7 - Table 10.26) */
508 case FZ_STRUCTURE_WARICHU:
509 return extract_struct_WARICHU;
510 case FZ_STRUCTURE_WT:
511 return extract_struct_WT;
512 case FZ_STRUCTURE_WP:
513 return extract_struct_WP;
514
515 /* Illustration elements (PDF 1.7 - Table 10.27) */
516 case FZ_STRUCTURE_FIGURE:
517 return extract_struct_FIGURE;
518 case FZ_STRUCTURE_FORMULA:
519 return extract_struct_FORMULA;
520 case FZ_STRUCTURE_FORM:
521 return extract_struct_FORM;
522
523 /* Artifact structure type (PDF 2.0 - Table 375) */
524 case FZ_STRUCTURE_ARTIFACT:
525 return extract_struct_ARTIFACT;
526 }
527 }
528
529 static void
530 dev_begin_structure(fz_context *ctx, fz_device *dev_, fz_structure standard, const char *raw, int idx)
531 {
532 fz_docx_device *dev = (fz_docx_device *)dev_;
533 extract_t *extract = dev->writer->extract;
534
535 assert(!dev->writer->ctx);
536 dev->writer->ctx = ctx;
537 fz_try(ctx)
538 {
539 if (extract_begin_struct(extract, fz_struct_to_extract(standard), idx, -1))
540 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin struct");
541 }
542 fz_always(ctx)
543 dev->writer->ctx = NULL;
544 fz_catch(ctx)
545 fz_rethrow(ctx);
546 }
547
548 static void
549 dev_end_structure(fz_context *ctx, fz_device *dev_)
550 {
551 fz_docx_device *dev = (fz_docx_device *)dev_;
552 extract_t *extract = dev->writer->extract;
553
554 assert(!dev->writer->ctx);
555 dev->writer->ctx = ctx;
556 fz_try(ctx)
557 {
558 if (extract_end_struct(extract))
559 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end struct");
560 }
561 fz_always(ctx)
562 dev->writer->ctx = NULL;
563 fz_catch(ctx)
564 fz_rethrow(ctx);
565 }
566
567
568 static fz_device *writer_begin_page(fz_context *ctx, fz_document_writer *writer_, fz_rect mediabox)
569 {
570 fz_docx_writer *writer = (fz_docx_writer*) writer_;
571 fz_docx_device *dev;
572 assert(!writer->ctx);
573 writer->ctx = ctx;
574 writer->mediabox = mediabox;
575 fz_var(dev);
576 fz_try(ctx)
577 {
578 if (extract_page_begin(writer->extract, mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1))
579 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin page");
580 dev = fz_new_derived_device(ctx, fz_docx_device);
581 dev->super.fill_text = dev_fill_text;
582 dev->super.stroke_text = dev_stroke_text;
583 dev->super.clip_text = dev_clip_text;
584 dev->super.clip_stroke_text = dev_clip_stroke_text;
585 dev->super.ignore_text = dev_ignore_text;
586 dev->super.fill_image = dev_fill_image;
587 dev->super.fill_path = dev_fill_path;
588 dev->super.stroke_path = dev_stroke_path;
589 dev->super.begin_structure = dev_begin_structure;
590 dev->super.end_structure = dev_end_structure;
591 dev->writer = writer;
592 }
593 fz_always(ctx)
594 {
595 writer->ctx = NULL;
596 }
597 fz_catch(ctx)
598 {
599 fz_rethrow(ctx);
600 }
601 return &dev->super;
602 }
603
604 static void writer_end_page(fz_context *ctx, fz_document_writer *writer_, fz_device *dev)
605 {
606 fz_docx_writer *writer = (fz_docx_writer*) writer_;
607 assert(!writer->ctx);
608 writer->ctx = ctx;
609 fz_try(ctx)
610 {
611 fz_close_device(ctx, dev);
612 if (extract_page_end(writer->extract))
613 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end page");
614
615 if (extract_process(writer->extract, writer->spacing, writer->rotation, writer->images))
616 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to process page");
617 }
618 fz_always(ctx)
619 {
620 writer->ctx = NULL;
621 fz_drop_device(ctx, dev);
622 }
623 fz_catch(ctx)
624 {
625 fz_rethrow(ctx);
626 }
627 }
628
629 static int buffer_write(void *handle, const void *source, size_t numbytes, size_t *o_actual)
630 /*
631 * extract_buffer_t callback that calls fz_write_data(). <source> will be docx
632 * archive data.
633 */
634 {
635 int e = 0;
636 fz_docx_writer *writer = handle;
637 fz_var(e);
638 fz_try(writer->ctx)
639 {
640 fz_write_data(writer->ctx, writer->output, source, numbytes);
641 *o_actual = numbytes;
642 }
643 fz_catch(writer->ctx)
644 {
645 errno = EIO;
646 e = -1;
647 }
648 return e;
649 }
650
651 static int buffer_cache(void *handle, void **o_cache, size_t *o_numbytes)
652 /*
653 * extract_buffer_t cache function. We simply return writer->output_cache.
654 */
655 {
656 fz_docx_writer *writer = handle;
657 *o_cache = writer->output_cache;
658 *o_numbytes = sizeof(writer->output_cache);
659 return 0;
660 }
661
662 static void writer_close(fz_context *ctx, fz_document_writer *writer_)
663 {
664 fz_docx_writer *writer = (fz_docx_writer*) writer_;
665 extract_buffer_t *extract_buffer_output = NULL;
666
667 fz_var(extract_buffer_output);
668 fz_var(writer);
669 assert(!writer->ctx);
670 writer->ctx = ctx;
671 fz_try(ctx)
672 {
673 /*
674 * Write docx to writer->output. Need to create an
675 * extract_buffer_t that writes to writer->output, for use by
676 * extract_write().
677 */
678 if (extract_buffer_open(
679 writer->alloc,
680 writer,
681 NULL /*fn_read*/,
682 buffer_write,
683 buffer_cache,
684 NULL /*fn_close*/,
685 &extract_buffer_output
686 ))
687 {
688 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_buffer_output: %s", strerror(errno));
689 }
690 if (extract_write(writer->extract, extract_buffer_output))
691 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to generate docx content: %s", strerror(errno));
692 if (extract_buffer_close(&extract_buffer_output))
693 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to close extract_buffer: %s", strerror(errno));
694
695 extract_end(&writer->extract);
696 fz_close_output(ctx, writer->output);
697 writer->ctx = NULL;
698 }
699 fz_catch(ctx)
700 {
701 /*
702 * We don't call fz_close_output() because it can throw and in
703 * this error case we can safely leave cleanup to our s_drop()
704 * function's calls to fz_drop_output().
705 */
706 extract_buffer_close(&extract_buffer_output);
707 extract_end(&writer->extract);
708 writer->ctx = NULL;
709 fz_rethrow(ctx);
710 }
711 }
712
713 static void writer_drop(fz_context *ctx, fz_document_writer *writer_)
714 {
715 fz_docx_writer *writer = (fz_docx_writer*) writer_;
716 fz_drop_output(ctx, writer->output);
717 writer->output = NULL;
718 assert(!writer->ctx);
719 writer->ctx = ctx;
720 extract_end(&writer->extract);
721 extract_alloc_destroy(&writer->alloc);
722 writer->ctx = NULL;
723 }
724
725
726 static int get_bool_option(fz_context *ctx, const char *options, const char *name, int default_)
727 {
728 const char *value;
729 if (fz_has_option(ctx, options, name, &value))
730 {
731 if (fz_option_eq(value, "yes")) return 1;
732 if (fz_option_eq(value, "no")) return 0;
733 else fz_throw(ctx, FZ_ERROR_SYNTAX, "option '%s' should be yes or no in options='%s'", name, options);
734 }
735 else
736 return default_;
737 }
738
739 static double get_double_option(fz_context *ctx, const char *options, const char *name, double default_)
740 {
741 const char *value;
742 if (fz_has_option(ctx, options, name, &value))
743 {
744 double ret = atof(value);
745 return ret;
746 }
747 else
748 return default_;
749 }
750
751 static void *s_realloc_fn(void *state, void *prev, size_t size)
752 {
753 fz_docx_writer *writer = state;
754 assert(writer);
755 assert(writer->ctx);
756 return fz_realloc_no_throw(writer->ctx, prev, size);
757 }
758
759 /* Will drop <out> if an error occurs. */
760 static fz_document_writer *fz_new_docx_writer_internal(fz_context *ctx, fz_output *out,
761 const char *options, extract_format_t format)
762 {
763 fz_docx_writer *writer = NULL;
764
765 fz_var(writer);
766
767 fz_try(ctx)
768 {
769 double space_guess = get_double_option(ctx, options, "space-guess", 0);
770 writer = fz_new_derived_document_writer(
771 ctx,
772 fz_docx_writer,
773 writer_begin_page,
774 writer_end_page,
775 writer_close,
776 writer_drop
777 );
778 writer->ctx = ctx;
779 writer->output = out;
780 if (get_bool_option(ctx, options, "html", 0)) format = extract_format_HTML;
781 if (get_bool_option(ctx, options, "text", 0)) format = extract_format_TEXT;
782 if (get_bool_option(ctx, options, "json", 0)) format = extract_format_JSON;
783 if (extract_alloc_create(s_realloc_fn, writer, &writer->alloc))
784 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_alloc instance");
785 if (extract_begin(writer->alloc, format, &writer->extract))
786 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract instance");
787 if (space_guess)
788 extract_set_space_guess(writer->extract, space_guess);
789 writer->spacing = get_bool_option(ctx, options, "spacing", 0);
790 writer->rotation = get_bool_option(ctx, options, "rotation", 1);
791 writer->images = get_bool_option(ctx, options, "images", 1);
792 writer->mediabox_clip = get_bool_option(ctx, options, "mediabox-clip", 1);
793 if (extract_set_layout_analysis(writer->extract, get_bool_option(ctx, options, "analyse", 0)))
794 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_enable_analysis failed.");
795 {
796 const char* v;
797 if (fz_has_option(ctx, options, "tables-csv-format", &v))
798 {
799 size_t len = strlen(v) + 1; /* Might include trailing options. */
800 char* formatbuf = fz_malloc(ctx, len);
801 fz_copy_option(ctx, v, formatbuf, len);
802 fprintf(stderr, "tables-csv-format: %s\n", formatbuf);
803 if (extract_tables_csv_format(writer->extract, formatbuf))
804 {
805 fz_free(ctx, formatbuf);
806 fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_tables_csv_format() failed.");
807 }
808 fz_free(ctx, formatbuf);
809 }
810 }
811 writer->ctx = NULL;
812 }
813 fz_catch(ctx)
814 {
815 /* fz_drop_document_writer() drops its output so we only need to call
816 fz_drop_output() if we failed before creating the writer. */
817 if (writer)
818 {
819 writer->ctx = ctx;
820 fz_drop_document_writer(ctx, &writer->super);
821 writer->ctx = NULL;
822 }
823 else
824 fz_drop_output(ctx, out);
825 fz_rethrow(ctx);
826 }
827 return &writer->super;
828 }
829
830 fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
831 {
832 return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
833 }
834
835 fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
836 {
837 /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
838 it always drops <out> if it fails. */
839 fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
840 return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
841 }
842
843 #if FZ_ENABLE_ODT_OUTPUT
844
845 fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
846 {
847 return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
848 }
849
850 fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
851 {
852 /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
853 it always drops <out> if it fails. */
854 fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
855 return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
856 }
857
858 #else
859
860 fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
861 {
862 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
863 return NULL;
864 }
865
866 fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
867 {
868 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
869 return NULL;
870 }
871
872 #endif
873
874 #else
875
876 fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
877 {
878 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
879 return NULL;
880 }
881
882 fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
883 {
884 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
885 return NULL;
886 }
887
888 fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
889 {
890 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
891 return NULL;
892 }
893
894 fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
895 {
896 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
897 return NULL;
898 }
899
900 #endif