Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/extract/src/odt.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* These extract_odt_*() functions generate odt content and odt zip archive | |
| 2 data. | |
| 3 | |
| 4 Caller must call things in a sensible order to create valid content - | |
| 5 e.g. don't call odt_paragraph_start() twice without intervening call to | |
| 6 odt_paragraph_finish(). */ | |
| 7 | |
| 8 #include "extract/extract.h" | |
| 9 | |
| 10 #include "odt_template.h" | |
| 11 | |
| 12 #include "astring.h" | |
| 13 #include "document.h" | |
| 14 #include "odt.h" | |
| 15 #include "mem.h" | |
| 16 #include "memento.h" | |
| 17 #include "outf.h" | |
| 18 #include "sys.h" | |
| 19 #include "text.h" | |
| 20 #include "zip.h" | |
| 21 | |
| 22 #include <assert.h> | |
| 23 #include <errno.h> | |
| 24 #include <float.h> | |
| 25 #include <math.h> | |
| 26 #include <stdlib.h> | |
| 27 #include <stdio.h> | |
| 28 #include <string.h> | |
| 29 | |
| 30 #include <sys/stat.h> | |
| 31 | |
| 32 | |
| 33 static int | |
| 34 odt_paragraph_start( | |
| 35 extract_alloc_t *alloc, | |
| 36 extract_astring_t *output) | |
| 37 { | |
| 38 return extract_astring_cat(alloc, output, "\n\n<text:p>"); | |
| 39 } | |
| 40 | |
| 41 static int | |
| 42 odt_paragraph_finish( | |
| 43 extract_alloc_t *alloc, | |
| 44 extract_astring_t *output) | |
| 45 { | |
| 46 return extract_astring_cat(alloc, output, "</text:p>"); | |
| 47 } | |
| 48 | |
| 49 /* ODT doesn't seem to support ad-hoc inline font specifications; instead | |
| 50 we have to define a style at the start of the content.xml file. So when | |
| 51 writing content we insert a style name and add the required styles to a | |
| 52 extract_odt_styles_t struct. */ | |
| 53 | |
| 54 struct extract_odt_style_t | |
| 55 { | |
| 56 int id; /* A unique id for this style. */ | |
| 57 font_t font; | |
| 58 }; | |
| 59 | |
| 60 struct extract_odt_styles_t | |
| 61 { | |
| 62 /* Styles are stored sorted. */ | |
| 63 extract_odt_style_t *styles; | |
| 64 int styles_num; | |
| 65 }; | |
| 66 | |
| 67 static int | |
| 68 odt_style_compare( | |
| 69 extract_odt_style_t *a, | |
| 70 extract_odt_style_t *b) | |
| 71 { | |
| 72 int d; | |
| 73 double dd; | |
| 74 | |
| 75 if ((d = strcmp(a->font.name, b->font.name))) return d; | |
| 76 if ((dd = a->font.size - b->font.size) != 0.0) return (dd > 0.0) ? 1 : -1; | |
| 77 if ((d = a->font.bold - b->font.bold)) return d; | |
| 78 if ((d = a->font.italic - b->font.italic)) return d; | |
| 79 | |
| 80 return 0; | |
| 81 } | |
| 82 | |
| 83 static int | |
| 84 odt_style_append_definition( | |
| 85 extract_alloc_t *alloc, | |
| 86 extract_odt_style_t *style, | |
| 87 extract_astring_t *text) | |
| 88 { | |
| 89 const char* font_name = style->font.name; | |
| 90 | |
| 91 /* This improves output e.g. for zlib.3.pdf, but clearly a hack. */ | |
| 92 if (0 && strstr(font_name, "Helvetica")) | |
| 93 { | |
| 94 font_name = "Liberation Sans"; | |
| 95 } | |
| 96 outf("style->font_name=%s font_name=%s", style->font.name, font_name); | |
| 97 if (extract_astring_catf(alloc, text, "<style:style style:name=\"T%i\" style:family=\"text\">", style->id)) return -1; | |
| 98 if (extract_astring_catf(alloc, text, "<style:text-properties style:font-name=\"%s\"", font_name)) return -1; | |
| 99 if (extract_astring_catf(alloc, text, " fo:font-size=\"%.2fpt\"", style->font.size)) return -1; | |
| 100 if (extract_astring_catf(alloc, text, " fo:font-weight=\"%s\"", style->font.bold ? "bold" : "normal")) return -1; | |
| 101 if (extract_astring_catf(alloc, text, " fo:font-style=\"%s\"", style->font.italic ? "italic" : "normal")) return -1; | |
| 102 if (extract_astring_cat(alloc, text, " /></style:style>")) return -1; | |
| 103 | |
| 104 return 0; | |
| 105 } | |
| 106 | |
| 107 void | |
| 108 extract_odt_styles_free( | |
| 109 extract_alloc_t *alloc, | |
| 110 extract_odt_styles_t *styles) | |
| 111 { | |
| 112 int i; | |
| 113 | |
| 114 for (i=0; i<styles->styles_num; ++i) | |
| 115 { | |
| 116 extract_odt_style_t* style = &styles->styles[i]; | |
| 117 extract_free(alloc, &style->font.name); | |
| 118 } | |
| 119 extract_free(alloc, &styles->styles); | |
| 120 } | |
| 121 | |
| 122 static int | |
| 123 odt_styles_definitions( | |
| 124 extract_alloc_t *alloc, | |
| 125 extract_odt_styles_t *styles, | |
| 126 extract_astring_t *out) | |
| 127 { | |
| 128 int i; | |
| 129 | |
| 130 if (extract_astring_cat(alloc, out, "<office:automatic-styles>")) return -1; | |
| 131 for (i=0; i<styles->styles_num; ++i) | |
| 132 { | |
| 133 if (odt_style_append_definition(alloc, &styles->styles[i], out)) return -1; | |
| 134 } | |
| 135 extract_astring_cat(alloc, out, "<style:style style:name=\"gr1\" style:family=\"graphic\">\n"); | |
| 136 extract_astring_cat(alloc, out, "<style:graphic-properties" | |
| 137 " draw:stroke=\"none\"" | |
| 138 " svg:stroke-color=\"#000000\"" | |
| 139 " draw:fill=\"none\"" | |
| 140 " draw:fill-color=\"#ffffff\"" | |
| 141 " fo:min-height=\"1.9898in\"" | |
| 142 " style:run-through=\"foreground\"" | |
| 143 " style:wrap=\"run-through\"" | |
| 144 " style:number-wrapped-paragraphs=\"no-limit\"" | |
| 145 " style:vertical-pos=\"from-top\"" | |
| 146 " style:vertical-rel=\"paragraph\"" | |
| 147 " style:horizontal-pos=\"from-left\"" | |
| 148 " style:horizontal-rel=\"paragraph\"" | |
| 149 " />\n" | |
| 150 ); | |
| 151 extract_astring_cat(alloc, out, "<style:paragraph-properties style:writing-mode=\"lr-tb\"/>\n"); | |
| 152 extract_astring_cat(alloc, out, "</style:style>\n"); | |
| 153 | |
| 154 /* Style for images. */ | |
| 155 extract_astring_cat(alloc, out, "<style:style style:name=\"fr1\" style:family=\"graphic\" style:parent-style-name=\"Graphics\">\n"); | |
| 156 extract_astring_cat(alloc, out, "<style:graphic-properties" | |
| 157 " fo:margin-left=\"0in\"" | |
| 158 " fo:margin-right=\"0in\"" | |
| 159 " fo:margin-top=\"0in\"" | |
| 160 " fo:margin-bottom=\"0in\"" | |
| 161 " style:vertical-pos=\"top\"" | |
| 162 " style:vertical-rel=\"baseline\"" | |
| 163 " fo:background-color=\"transparent\"" | |
| 164 " draw:fill=\"none\"" | |
| 165 " draw:fill-color=\"#ffffff\"" | |
| 166 " fo:padding=\"0in\"" | |
| 167 " fo:border=\"none\"" | |
| 168 " style:mirror=\"none\"" | |
| 169 " fo:clip=\"rect(0in, 0in, 0in, 0in)\"" | |
| 170 " draw:luminance=\"0%\"" | |
| 171 " draw:contrast=\"0%\"" | |
| 172 " draw:red=\"0%\"" | |
| 173 " draw:green=\"0%\"" | |
| 174 " draw:blue=\"0%\"" | |
| 175 " draw:gamma=\"100%\"" | |
| 176 " draw:color-inversion=\"false\"" | |
| 177 " draw:image-opacity=\"100%\"" | |
| 178 " draw:color-mode=\"standard\"" | |
| 179 "/>\n"); | |
| 180 extract_astring_cat(alloc, out, "</style:style>\n"); | |
| 181 | |
| 182 if (extract_astring_cat(alloc, out, "</office:automatic-styles>")) return -1; | |
| 183 | |
| 184 return 0; | |
| 185 } | |
| 186 | |
| 187 /* Adds specified style to <styles> if not already present. Sets *o_style to | |
| 188 point to the style_t within <styles>. */ | |
| 189 static int | |
| 190 odt_styles_add( | |
| 191 extract_alloc_t *alloc, | |
| 192 extract_odt_styles_t *styles, | |
| 193 font_t *font, | |
| 194 extract_odt_style_t **o_style) | |
| 195 { | |
| 196 extract_odt_style_t style = {0 /*id*/, *font}; | |
| 197 int i; | |
| 198 | |
| 199 /* We keep styles->styles[] sorted; todo: use bsearch or similar when | |
| 200 searching. */ | |
| 201 for (i=0; i<styles->styles_num; ++i) | |
| 202 { | |
| 203 int d = odt_style_compare(&style, &styles->styles[i]); | |
| 204 if (d == 0) | |
| 205 { | |
| 206 *o_style = &styles->styles[i]; | |
| 207 return 0; | |
| 208 } | |
| 209 if (d > 0) break; | |
| 210 } | |
| 211 /* Insert at position <i>. */ | |
| 212 if (extract_realloc(alloc, &styles->styles, sizeof(styles->styles[0]) * (styles->styles_num+1))) return -1; | |
| 213 memmove(&styles->styles[i+1], &styles->styles[i], sizeof(styles->styles[0]) * (styles->styles_num - i)); | |
| 214 styles->styles_num += 1; | |
| 215 styles->styles[i].id = styles->styles_num + 10; /* Leave space for template's built-in styles. */ | |
| 216 if (extract_strdup(alloc, font->name, &styles->styles[i].font.name)) return -1; | |
| 217 styles->styles[i].font.size = font->size; | |
| 218 styles->styles[i].font.bold = font->bold; | |
| 219 styles->styles[i].font.italic = font->italic; | |
| 220 *o_style = &styles->styles[i]; | |
| 221 | |
| 222 return 0; | |
| 223 } | |
| 224 | |
| 225 /* Starts a new run. Caller must ensure that s_odt_run_finish() was | |
| 226 called to terminate any previous run. */ | |
| 227 static int | |
| 228 extract_odt_run_start( | |
| 229 extract_alloc_t *alloc, | |
| 230 extract_astring_t *content, | |
| 231 extract_odt_styles_t *styles, | |
| 232 content_state_t *content_state) | |
| 233 { | |
| 234 extract_odt_style_t* style; | |
| 235 | |
| 236 if (odt_styles_add(alloc, | |
| 237 styles, | |
| 238 &content_state->font, | |
| 239 &style)) return -1; | |
| 240 if (extract_astring_catf(alloc, content, "<text:span text:style-name=\"T%i\">", style->id)) return -1; | |
| 241 | |
| 242 return 0; | |
| 243 } | |
| 244 | |
| 245 static int | |
| 246 odt_run_finish( | |
| 247 extract_alloc_t *alloc, | |
| 248 content_state_t *content_state, | |
| 249 extract_astring_t *content) | |
| 250 { | |
| 251 if (content_state) | |
| 252 content_state->font.name = NULL; | |
| 253 return extract_astring_cat(alloc, content, "</text:span>"); | |
| 254 } | |
| 255 | |
| 256 /* Append an empty paragraph to *content. */ | |
| 257 static int | |
| 258 odt_append_empty_paragraph( | |
| 259 extract_alloc_t *alloc, | |
| 260 extract_astring_t *content, | |
| 261 extract_odt_styles_t *styles) | |
| 262 { | |
| 263 int e = -1; | |
| 264 static char fontname[] = "OpenSans"; | |
| 265 content_state_t content_state = {0}; | |
| 266 | |
| 267 if (odt_paragraph_start(alloc, content)) goto end; | |
| 268 /* [This comment is from docx, haven't checked odt.] It seems like our | |
| 269 choice of font size here doesn't make any difference to the amount of | |
| 270 vertical space, unless we include a non-space character. Presumably | |
| 271 something to do with the styles in the template document. */ | |
| 272 content_state.font.name = fontname; | |
| 273 content_state.font.size = 10; | |
| 274 content_state.font.bold = 0; | |
| 275 content_state.font.italic = 0; | |
| 276 if (extract_odt_run_start(alloc, content, styles, &content_state)) goto end; | |
| 277 //docx_char_append_string(content, " "); /*   is non-break space. */ | |
| 278 if (odt_run_finish(alloc, NULL /*content_state*/, content)) goto end; | |
| 279 if (odt_paragraph_finish(alloc, content)) goto end; | |
| 280 e = 0; | |
| 281 | |
| 282 end: | |
| 283 return e; | |
| 284 } | |
| 285 | |
| 286 | |
| 287 /* Append odt xml for <paragraph> to <content>. Updates *content_state if we | |
| 288 change font. */ | |
| 289 static int | |
| 290 document_to_odt_content_paragraph( | |
| 291 extract_alloc_t *alloc, | |
| 292 content_state_t *content_state, | |
| 293 paragraph_t *paragraph, | |
| 294 extract_astring_t *content, | |
| 295 extract_odt_styles_t *styles) | |
| 296 { | |
| 297 int e = -1; | |
| 298 content_line_iterator lit; | |
| 299 line_t *line; | |
| 300 | |
| 301 if (odt_paragraph_start(alloc, content)) goto end; | |
| 302 | |
| 303 /* Output justification. */ | |
| 304 if ((paragraph->line_flags & paragraph_not_fully_justified) == 0) | |
| 305 { | |
| 306 if (extract_astring_cat(alloc, content, "<w:pPr><w:jc w:val=\"both\"/></w:pPr>")) goto end; | |
| 307 } | |
| 308 else if ((paragraph->line_flags & paragraph_not_centred) == 0) | |
| 309 { | |
| 310 if (extract_astring_cat(alloc, content, "<w:pPr><w:jc w:val=\"center\"/></w:pPr>")) goto end; | |
| 311 } | |
| 312 else if ((paragraph->line_flags & (paragraph_not_aligned_left | paragraph_not_aligned_right)) == paragraph_not_aligned_left) | |
| 313 { | |
| 314 if (extract_astring_cat(alloc, content, "<w:pPr><w:jc w:val=\"right\"/></w:pPr>")) goto end; | |
| 315 } | |
| 316 else if ((paragraph->line_flags & (paragraph_not_aligned_left | paragraph_not_aligned_right)) == paragraph_not_aligned_right) | |
| 317 { | |
| 318 if (extract_astring_cat(alloc, content, "<w:pPr><w:jc w:val=\"left\"/></w:pPr>")) goto end; | |
| 319 } | |
| 320 | |
| 321 | |
| 322 for (line = content_line_iterator_init(&lit, ¶graph->content); line != NULL; line = content_line_iterator_next(&lit)) | |
| 323 { | |
| 324 content_span_iterator sit; | |
| 325 span_t *span; | |
| 326 | |
| 327 for (span = content_span_iterator_init(&sit, &line->content); span != NULL; span = content_span_iterator_next(&sit)) | |
| 328 { | |
| 329 int si; | |
| 330 double font_size_new; | |
| 331 | |
| 332 content_state->ctm_prev = &span->ctm; | |
| 333 font_size_new = extract_font_size(&span->ctm); | |
| 334 if (!content_state->font.name | |
| 335 || strcmp(span->font_name, content_state->font.name) | |
| 336 || span->flags.font_bold != content_state->font.bold | |
| 337 || span->flags.font_italic != content_state->font.italic | |
| 338 || font_size_new != content_state->font.size | |
| 339 ) | |
| 340 { | |
| 341 if (content_state->font.name) | |
| 342 { | |
| 343 if (odt_run_finish(alloc, content_state, content)) goto end; | |
| 344 } | |
| 345 content_state->font.name = span->font_name; | |
| 346 content_state->font.bold = span->flags.font_bold; | |
| 347 content_state->font.italic = span->flags.font_italic; | |
| 348 content_state->font.size = font_size_new; | |
| 349 if (extract_odt_run_start( alloc, content, styles, content_state)) goto end; | |
| 350 } | |
| 351 | |
| 352 for (si=0; si<span->chars_num; ++si) | |
| 353 { | |
| 354 char_t* char_ = &span->chars[si]; | |
| 355 int c = char_->ucs; | |
| 356 if (extract_astring_catc_unicode_xml(alloc, content, c)) goto end; | |
| 357 } | |
| 358 /* Remove any trailing '-' at end of line. */ | |
| 359 if (extract_astring_char_truncate_if(content, '-')) goto end; | |
| 360 } | |
| 361 if (paragraph->line_flags & paragraph_breaks_strangely) | |
| 362 { | |
| 363 if (extract_astring_cat(alloc, content, "<w:br/>")) goto end; | |
| 364 } | |
| 365 } | |
| 366 if (content_state->font.name) | |
| 367 { | |
| 368 if (odt_run_finish(alloc, content_state, content)) goto end; | |
| 369 } | |
| 370 if (odt_paragraph_finish(alloc, content)) goto end; | |
| 371 | |
| 372 e = 0; | |
| 373 | |
| 374 end: | |
| 375 return e; | |
| 376 } | |
| 377 | |
| 378 /* Write reference to image into odt content. */ | |
| 379 static int | |
| 380 odt_append_image( | |
| 381 extract_alloc_t *alloc, | |
| 382 extract_astring_t *output, | |
| 383 image_t *image) | |
| 384 { | |
| 385 extract_astring_cat(alloc, output, "\n"); | |
| 386 extract_astring_cat(alloc, output, "<text:p text:style-name=\"Standard\">\n"); | |
| 387 extract_astring_catf(alloc, output, "<draw:frame draw:style-name=\"fr1\" draw:name=\"Picture %s\" text:anchor-type=\"as-char\" svg:width=\"%fin\" svg:height=\"%fin\" draw:z-index=\"0\">\n", | |
| 388 image->id, | |
| 389 image->w / 72.0, | |
| 390 image->h / 72.0); | |
| 391 extract_astring_catf(alloc, output, "<draw:image xlink:href=\"Pictures/%s\" xlink:type=\"simple\" xlink:show=\"embed\" xlink:actuate=\"onLoad\" draw:mime-type=\"image/%s\"/>\n", | |
| 392 image->name, | |
| 393 image->type); | |
| 394 extract_astring_cat(alloc, output, "</draw:frame>\n"); | |
| 395 extract_astring_cat(alloc, output, "</text:p>\n"); | |
| 396 | |
| 397 return 0; | |
| 398 } | |
| 399 | |
| 400 | |
| 401 /* Writes paragraph to content inside rotated text box. */ | |
| 402 static int | |
| 403 odt_output_rotated_paragraphs( | |
| 404 extract_alloc_t *alloc, | |
| 405 block_t *block, | |
| 406 double rotation_rad, | |
| 407 double x_pt, | |
| 408 double y_pt, | |
| 409 double w_pt, | |
| 410 double h_pt, | |
| 411 int text_box_id, | |
| 412 extract_astring_t *content, | |
| 413 extract_odt_styles_t *styles, | |
| 414 content_state_t *content_state) | |
| 415 { | |
| 416 int e = 0; | |
| 417 paragraph_t *paragraph; | |
| 418 content_paragraph_iterator pit; | |
| 419 double pt_to_inch = 1/72.0; | |
| 420 | |
| 421 outf("rotated paragraphs: rotation_rad=%f (x y)=(%f %f) (w h)=(%f %f)", rotation_rad, x_pt, y_pt, w_pt, h_pt); | |
| 422 | |
| 423 // https://docs.oasis-open.org/office/OpenDocument/v1.3/cs02/part3-schema/OpenDocument-v1.3-cs02-part3-schema.html#attribute-draw_transform | |
| 424 // says rotation is in degrees, but we seem to require -radians. | |
| 425 // | |
| 426 | |
| 427 if (!e) e = extract_astring_cat(alloc, content, "\n"); | |
| 428 | |
| 429 if (!e) e = extract_astring_cat(alloc, content, "<text:p text:style-name=\"Standard\">\n"); | |
| 430 if (!e) e = extract_astring_catf(alloc, content, "<draw:frame" | |
| 431 " text:anchor-type=\"paragraph\"" | |
| 432 " draw:z-index=\"5\"" | |
| 433 " draw:name=\"Shape%i\"" | |
| 434 " draw:style-name=\"gr1\"" | |
| 435 " draw:text-style-name=\"Standard\"" | |
| 436 " svg:width=\"%fin\"" | |
| 437 " svg:height=\"%fin\"" | |
| 438 " draw:transform=\"rotate (%f) translate (%fin %fin)\"" | |
| 439 ">\n" | |
| 440 , | |
| 441 text_box_id, | |
| 442 w_pt * pt_to_inch, | |
| 443 h_pt * pt_to_inch, | |
| 444 -rotation_rad, | |
| 445 x_pt * pt_to_inch, | |
| 446 y_pt * pt_to_inch | |
| 447 ); | |
| 448 if (!e) e = extract_astring_cat(alloc, content, "<draw:text-box>\n"); | |
| 449 | |
| 450 for (paragraph = content_paragraph_iterator_init(&pit, &block->content); paragraph != NULL; paragraph = content_paragraph_iterator_next(&pit)) | |
| 451 if (!e) e = document_to_odt_content_paragraph(alloc, content_state, paragraph, content, styles); | |
| 452 | |
| 453 if (!e) e = extract_astring_cat(alloc, content, "\n"); | |
| 454 if (!e) e = extract_astring_cat(alloc, content, "</draw:text-box>\n"); | |
| 455 if (!e) e = extract_astring_cat(alloc, content, "</draw:frame>\n"); | |
| 456 | |
| 457 if (!e) e = extract_astring_cat(alloc, content, "</text:p>\n"); | |
| 458 | |
| 459 return e; | |
| 460 } | |
| 461 | |
| 462 | |
| 463 static int | |
| 464 odt_append_table( | |
| 465 extract_alloc_t *alloc, | |
| 466 table_t *table, | |
| 467 extract_astring_t *output, | |
| 468 extract_odt_styles_t *styles) | |
| 469 { | |
| 470 int e = -1; | |
| 471 int y; | |
| 472 | |
| 473 { | |
| 474 int x; | |
| 475 static int table_number = 0; | |
| 476 table_number += 1; | |
| 477 if (extract_astring_catf(alloc, output, | |
| 478 "\n" | |
| 479 " <table:table text:style-name=\"extract.table\" table:name=\"extract.table.%i\">\n" | |
| 480 " <table:table-columns>\n" | |
| 481 , | |
| 482 table_number | |
| 483 )) goto end; | |
| 484 | |
| 485 for (x=0; x<table->cells_num_x; ++x) | |
| 486 { | |
| 487 if (extract_astring_cat(alloc, output, | |
| 488 " <table:table-column table:style-name=\"extract.table.column\"/>\n" | |
| 489 )) goto end; | |
| 490 } | |
| 491 if (extract_astring_cat(alloc, output, | |
| 492 " </table:table-columns>\n" | |
| 493 )) goto end; | |
| 494 } | |
| 495 for (y=0; y<table->cells_num_y; ++y) | |
| 496 { | |
| 497 int x; | |
| 498 if (extract_astring_cat(alloc, output, | |
| 499 " <table:table-row>\n" | |
| 500 )) goto end; | |
| 501 | |
| 502 for (x=0; x<table->cells_num_x; ++x) | |
| 503 { | |
| 504 cell_t *cell = table->cells[y*table->cells_num_x + x]; | |
| 505 content_paragraph_iterator pit; | |
| 506 paragraph_t *paragraph; | |
| 507 content_state_t content_state; | |
| 508 | |
| 509 if (!cell->above || !cell->left) | |
| 510 { | |
| 511 if (extract_astring_cat(alloc, output, " <table:covered-table-cell/>\n")) goto end; | |
| 512 continue; | |
| 513 } | |
| 514 | |
| 515 if (extract_astring_cat(alloc, output, " <table:table-cell")) goto end; | |
| 516 if (cell->extend_right > 1) | |
| 517 { | |
| 518 if (extract_astring_catf(alloc, output, " table:number-columns-spanned=\"%i\"", cell->extend_right)) goto end; | |
| 519 } | |
| 520 if (cell->extend_down > 1) | |
| 521 { | |
| 522 if (extract_astring_catf(alloc, output, " table:number-rows-spanned=\"%i\"", cell->extend_down)) goto end; | |
| 523 } | |
| 524 if (extract_astring_catf(alloc, output, ">\n")) goto end; | |
| 525 | |
| 526 /* Write contents of this cell. */ | |
| 527 content_state.font.name = NULL; | |
| 528 content_state.ctm_prev = NULL; | |
| 529 for (paragraph = content_paragraph_iterator_init(&pit, &cell->content); paragraph != NULL; paragraph = content_paragraph_iterator_next(&pit)) | |
| 530 if (document_to_odt_content_paragraph(alloc, &content_state, paragraph, output, styles)) goto end; | |
| 531 if (content_state.font.name) | |
| 532 if (odt_run_finish(alloc, &content_state, output)) goto end; | |
| 533 if (extract_astring_cat(alloc, output, "\n")) goto end; | |
| 534 if (extract_astring_cat(alloc, output, " </table:table-cell>\n")) goto end; | |
| 535 } | |
| 536 if (extract_astring_cat(alloc, output, " </table:table-row>\n")) goto end; | |
| 537 } | |
| 538 if (extract_astring_cat(alloc, output, " </table:table>\n")) goto end; | |
| 539 e = 0; | |
| 540 | |
| 541 end: | |
| 542 return e; | |
| 543 } | |
| 544 | |
| 545 | |
| 546 /* Appends paragraphs with same rotation, starting with subpage->paragraphs[*p] | |
| 547 and updates *p. */ | |
| 548 static int | |
| 549 odt_append_rotated_paragraphs( | |
| 550 extract_alloc_t *alloc, | |
| 551 content_state_t *content_state, | |
| 552 block_t *block, | |
| 553 int *text_box_id, | |
| 554 const matrix4_t *ctm, | |
| 555 double rotate, | |
| 556 extract_astring_t *output, | |
| 557 extract_odt_styles_t *styles) | |
| 558 { | |
| 559 /* Find extent of paragraphs with this same rotation. extent | |
| 560 will contain max width and max height of paragraphs, in units | |
| 561 before application of ctm, i.e. before rotation. */ | |
| 562 int e = -1; | |
| 563 point_t extent = {0, 0}; | |
| 564 content_iterator cit; | |
| 565 content_t *content; | |
| 566 paragraph_t *paragraph = content_first_paragraph(&block->content); | |
| 567 | |
| 568 /* We assume that first span is at origin of text | |
| 569 * block. This assumes left-to-right text. */ | |
| 570 span_t *first_span = content_first_span(&content_first_line(¶graph->content)->content); | |
| 571 point_t origin = { first_span->chars[0].x, | |
| 572 first_span->chars[0].y }; | |
| 573 matrix_t ctm_inverse = {1, 0, 0, 1, 0, 0}; | |
| 574 double ctm_det = ctm->a*ctm->d - ctm->b*ctm->c; | |
| 575 | |
| 576 outf("rotate=%.2frad=%.1fdeg ctm: origin=(%f %f) abcd=(%f %f %f %f)", | |
| 577 rotate, rotate * 180 / pi, | |
| 578 origin.x, | |
| 579 origin.y, | |
| 580 ctm->a, | |
| 581 ctm->b, | |
| 582 ctm->c, | |
| 583 ctm->d | |
| 584 ); | |
| 585 | |
| 586 if (ctm_det != 0) | |
| 587 { | |
| 588 ctm_inverse.a = +ctm->d / ctm_det; | |
| 589 ctm_inverse.b = -ctm->b / ctm_det; | |
| 590 ctm_inverse.c = -ctm->c / ctm_det; | |
| 591 ctm_inverse.d = +ctm->a / ctm_det; | |
| 592 } | |
| 593 else | |
| 594 { | |
| 595 outf("cannot invert ctm=(%f %f %f %f)", | |
| 596 ctm->a, ctm->b, ctm->c, ctm->d); | |
| 597 } | |
| 598 | |
| 599 for (content = content_iterator_init(&cit, &block->content); content != NULL; content = content_iterator_next(&cit)) | |
| 600 { | |
| 601 content_line_iterator lit; | |
| 602 line_t *line; | |
| 603 paragraph_t *paragraph; | |
| 604 | |
| 605 assert(content->type == content_paragraph); | |
| 606 if (content->type != content_paragraph) | |
| 607 continue; /* This shouldn't happen for now! */ | |
| 608 | |
| 609 paragraph = (paragraph_t *)content; | |
| 610 | |
| 611 /* Update <extent>. */ | |
| 612 for (line = content_line_iterator_init(&lit, ¶graph->content); line != NULL; line = content_line_iterator_next(&lit)) | |
| 613 { | |
| 614 span_t *span = extract_line_span_last(line); | |
| 615 char_t *char_ = extract_span_char_last(span); | |
| 616 double adv = char_->adv * extract_font_size(&span->ctm); | |
| 617 double x = char_->x + adv * cos(rotate); | |
| 618 double y = char_->y + adv * sin(rotate); | |
| 619 | |
| 620 double dx = x - origin.x; | |
| 621 double dy = y - origin.y; | |
| 622 | |
| 623 /* Position relative to origin and before box rotation. */ | |
| 624 double xx = ctm_inverse.a * dx + ctm_inverse.b * dy; | |
| 625 double yy = ctm_inverse.c * dx + ctm_inverse.d * dy; | |
| 626 yy = -yy; | |
| 627 if (xx > extent.x) extent.x = xx; | |
| 628 if (yy > extent.y) extent.y = yy; | |
| 629 if (0) outf("rotate=%f origin=(%f %f) xy=(%f %f) dxy=(%f %f) xxyy=(%f %f) span: %s", | |
| 630 rotate, origin.x, origin.y, x, y, dx, dy, xx, yy, extract_span_string(alloc, span)); | |
| 631 } | |
| 632 } | |
| 633 outf("rotate=%f extent is: (%f %f)", | |
| 634 rotate, extent.x, extent.y); | |
| 635 | |
| 636 /* All the paragraphs have same rotation. We output them into | |
| 637 * a single rotated text box. */ | |
| 638 | |
| 639 /* We need unique id for text box. */ | |
| 640 *text_box_id += 1; | |
| 641 | |
| 642 if (odt_output_rotated_paragraphs( | |
| 643 alloc, | |
| 644 block, | |
| 645 rotate, | |
| 646 origin.x, | |
| 647 origin.y, | |
| 648 extent.x, | |
| 649 extent.y, | |
| 650 *text_box_id, | |
| 651 output, | |
| 652 styles, | |
| 653 content_state)) goto end; | |
| 654 | |
| 655 e = 0; | |
| 656 end: | |
| 657 | |
| 658 return e; | |
| 659 } | |
| 660 | |
| 661 | |
| 662 static int | |
| 663 extract_page_to_odt_content( | |
| 664 extract_alloc_t *alloc, | |
| 665 extract_page_t *page, | |
| 666 int spacing, | |
| 667 int rotation, | |
| 668 int images, | |
| 669 extract_astring_t *output, | |
| 670 extract_odt_styles_t *styles) | |
| 671 { | |
| 672 int ret = -1; | |
| 673 int text_box_id = 0; | |
| 674 int c; | |
| 675 | |
| 676 /* Write paragraphs into <content>. */ | |
| 677 for (c=0; c<page->subpages_num; ++c) | |
| 678 { | |
| 679 subpage_t *subpage = page->subpages[c]; | |
| 680 content_iterator cit; | |
| 681 content_t *content; | |
| 682 content_table_iterator tit; | |
| 683 table_t *table; | |
| 684 content_state_t content_state; | |
| 685 content_state.font.name = NULL; | |
| 686 content_state.font.size = 0; | |
| 687 content_state.font.bold = 0; | |
| 688 content_state.font.italic = 0; | |
| 689 content_state.ctm_prev = NULL; | |
| 690 | |
| 691 content = content_iterator_init(&cit, &subpage->content); | |
| 692 table = content_table_iterator_init(&tit, &subpage->tables); | |
| 693 while (1) | |
| 694 { | |
| 695 double y_paragraph; | |
| 696 double y_table; | |
| 697 block_t *block = (content && content->type == content_block) ? (block_t *)content : NULL; | |
| 698 paragraph_t *paragraph = (content && content->type == content_paragraph) ? (paragraph_t *)content : (block ? content_first_paragraph(&block->content) : NULL); | |
| 699 line_t *first_line = paragraph ? content_first_line(¶graph->content) : NULL; | |
| 700 span_t *first_span = first_line ? content_first_span(&first_line->content) : NULL; | |
| 701 if (!paragraph && !table) break; | |
| 702 y_paragraph = (first_span) ? first_span->chars[0].y : DBL_MAX; | |
| 703 y_table = (table) ? table->pos.y : DBL_MAX; | |
| 704 | |
| 705 if (first_span && y_paragraph < y_table) | |
| 706 { | |
| 707 const matrix4_t *ctm = &first_span->ctm; | |
| 708 double rotate = atan2(ctm->b, ctm->a); | |
| 709 | |
| 710 if (spacing | |
| 711 && content_state.ctm_prev | |
| 712 && first_span | |
| 713 && extract_matrix4_cmp(content_state.ctm_prev, | |
| 714 &first_span->ctm) | |
| 715 ) | |
| 716 { | |
| 717 /* Extra vertical space between paragraphs that were at | |
| 718 different angles in the original document. */ | |
| 719 if (odt_append_empty_paragraph(alloc, output, styles)) goto end; | |
| 720 } | |
| 721 | |
| 722 if (spacing) | |
| 723 { | |
| 724 /* Extra vertical space between paragraphs. */ | |
| 725 if (odt_append_empty_paragraph(alloc, output, styles)) goto end; | |
| 726 } | |
| 727 | |
| 728 if (rotation && rotate != 0) | |
| 729 { | |
| 730 assert(block); | |
| 731 if (odt_append_rotated_paragraphs(alloc, &content_state, block, &text_box_id, ctm, rotate, output, styles)) goto end; | |
| 732 } | |
| 733 else if (block) | |
| 734 { | |
| 735 content_paragraph_iterator pit; | |
| 736 int first = 1; | |
| 737 | |
| 738 for (paragraph = content_paragraph_iterator_init(&pit, &block->content); paragraph != NULL; paragraph = content_paragraph_iterator_next(&pit)) | |
| 739 { | |
| 740 if (spacing && !first) | |
| 741 { | |
| 742 /* Extra vertical space between paragraphs. */ | |
| 743 if (odt_append_empty_paragraph(alloc, output, styles)) goto end; | |
| 744 } | |
| 745 first = 0; | |
| 746 | |
| 747 if (document_to_odt_content_paragraph(alloc, &content_state, paragraph, output, styles)) goto end; | |
| 748 } | |
| 749 } | |
| 750 else | |
| 751 { | |
| 752 if (document_to_odt_content_paragraph(alloc, &content_state, paragraph, output, styles)) goto end; | |
| 753 } | |
| 754 content = content_iterator_next(&cit); | |
| 755 } | |
| 756 else if (table) | |
| 757 { | |
| 758 if (odt_append_table(alloc, table, output, styles)) goto end; | |
| 759 table = content_table_iterator_next(&tit); | |
| 760 } | |
| 761 } | |
| 762 | |
| 763 outf("images=%i", images); | |
| 764 if (images) | |
| 765 { | |
| 766 content_t *images, *next; | |
| 767 outf("subpage->images_num=%i", content_count_images(&subpage->content)); | |
| 768 for (images = subpage->content.base.next; images != &subpage->content.base; images = next) | |
| 769 { | |
| 770 image_t *image = (image_t *)images; | |
| 771 next = images->next; | |
| 772 if (images->type != content_image) | |
| 773 continue; | |
| 774 odt_append_image(alloc, output, image); | |
| 775 } | |
| 776 } | |
| 777 } | |
| 778 ret = 0; | |
| 779 | |
| 780 end: | |
| 781 | |
| 782 return ret; | |
| 783 } | |
| 784 | |
| 785 int | |
| 786 extract_document_to_odt_content( | |
| 787 extract_alloc_t *alloc, | |
| 788 document_t *document, | |
| 789 int spacing, | |
| 790 int rotation, | |
| 791 int images, | |
| 792 extract_astring_t *content, | |
| 793 extract_odt_styles_t *styles) | |
| 794 { | |
| 795 int p; | |
| 796 int ret = 0; | |
| 797 | |
| 798 /* Write paragraphs into <content>. */ | |
| 799 for (p=0; p<document->pages_num; ++p) | |
| 800 { | |
| 801 extract_page_t *page = document->pages[p]; | |
| 802 | |
| 803 ret = extract_page_to_odt_content( | |
| 804 alloc, | |
| 805 page, | |
| 806 spacing, | |
| 807 rotation, | |
| 808 images, | |
| 809 content, | |
| 810 styles); | |
| 811 if (ret) break; | |
| 812 }; | |
| 813 | |
| 814 return ret; | |
| 815 } | |
| 816 | |
| 817 int | |
| 818 extract_odt_content_item( | |
| 819 extract_alloc_t *alloc, | |
| 820 extract_astring_t *contentss, | |
| 821 int contentss_num, | |
| 822 extract_odt_styles_t *styles, | |
| 823 images_t *images, | |
| 824 const char *name, | |
| 825 const char *text, | |
| 826 char **text2) | |
| 827 { | |
| 828 int e = -1; | |
| 829 extract_astring_t temp; | |
| 830 extract_astring_init(&temp); | |
| 831 *text2 = NULL; | |
| 832 | |
| 833 (void) images; | |
| 834 if (0) | |
| 835 {} | |
| 836 else if (!strcmp(name, "content.xml")) | |
| 837 { | |
| 838 /* Insert paragraphs content. */ | |
| 839 char* text_intermediate = NULL; | |
| 840 extract_astring_t styles_definitions = {0}; | |
| 841 | |
| 842 /* Insert content before '</office:text>'. */ | |
| 843 if (extract_content_insert( | |
| 844 alloc, | |
| 845 text, | |
| 846 NULL /*single*/, | |
| 847 NULL /*mid_begin_name*/, | |
| 848 "</office:text>" /*mid_end_name*/, | |
| 849 contentss, | |
| 850 contentss_num, | |
| 851 &text_intermediate | |
| 852 )) goto end; | |
| 853 outf("text_intermediate: %s", text_intermediate); | |
| 854 | |
| 855 /* Convert <styles> to text. */ | |
| 856 if (odt_styles_definitions(alloc, styles, &styles_definitions)) goto end; | |
| 857 | |
| 858 /* To make tables work, we seem to need to specify table and column | |
| 859 styles, and these can be empty. todo: maybe specify exact sizes based | |
| 860 on the pdf table and cell dimensions. */ | |
| 861 if (extract_astring_cat(alloc, &styles_definitions, | |
| 862 "\n" | |
| 863 "<style:style style:name=\"extract.table\" style:family=\"table\"/>\n" | |
| 864 "<style:style style:name=\"extract.table.column\" style:family=\"table-column\"/>\n" | |
| 865 )) goto end; | |
| 866 | |
| 867 /* Replace '<office:automatic-styles/>' with text from | |
| 868 <styles_definitions>. */ | |
| 869 e = extract_content_insert( | |
| 870 alloc, | |
| 871 text_intermediate, | |
| 872 "<office:automatic-styles/>" /*single*/, | |
| 873 NULL /*mid_begin_name*/, | |
| 874 NULL /*mid_end_name*/, | |
| 875 &styles_definitions, | |
| 876 1, | |
| 877 text2 | |
| 878 ); | |
| 879 outf("e=%i errno=%i", e, errno); | |
| 880 extract_free(alloc, &text_intermediate); | |
| 881 extract_astring_free(alloc, &styles_definitions); | |
| 882 outf("e=%i errno=%i", e, errno); | |
| 883 if (e) goto end; | |
| 884 } | |
| 885 else if (!strcmp(name, "META-INF/manifest.xml")) | |
| 886 { | |
| 887 /* Add images. */ | |
| 888 int e = 0; | |
| 889 int i; | |
| 890 for (i=0; i<images->images_num; ++i) | |
| 891 { | |
| 892 image_t* image = images->images[i]; | |
| 893 if (!e) e = extract_astring_catf(alloc, &temp, "<manifest:file-entry manifest:full-path=\"Pictures/%s\" manifest:media-type=\"image/%s\"/>\n", | |
| 894 image->name, | |
| 895 image->type | |
| 896 ); | |
| 897 } | |
| 898 if (!e) e = extract_content_insert( | |
| 899 alloc, | |
| 900 text, | |
| 901 NULL /*single*/, | |
| 902 NULL /*mid_begin_name*/, | |
| 903 "</manifest:manifest>" /*mid_end_name*/, | |
| 904 &temp, | |
| 905 1, | |
| 906 text2 | |
| 907 ); | |
| 908 if (e) goto end; | |
| 909 } | |
| 910 else | |
| 911 { | |
| 912 *text2 = NULL; | |
| 913 } | |
| 914 e = 0; | |
| 915 end: | |
| 916 outf("e=%i errno=%i text2=%s", e, errno, text2 ? *text2 : ""); | |
| 917 if (e) | |
| 918 { | |
| 919 /* We might have set <text2> to new content. */ | |
| 920 extract_free(alloc, text2); | |
| 921 /* We might have used <temp> as a temporary buffer. */ | |
| 922 } | |
| 923 extract_astring_free(alloc, &temp); | |
| 924 extract_astring_init(&temp); | |
| 925 return e; | |
| 926 } | |
| 927 | |
| 928 | |
| 929 | |
| 930 int | |
| 931 extract_odt_write_template( | |
| 932 extract_alloc_t *alloc, | |
| 933 extract_astring_t *contentss, | |
| 934 int contentss_num, | |
| 935 extract_odt_styles_t *styles, | |
| 936 images_t *images, | |
| 937 const char *path_template, | |
| 938 const char *path_out, | |
| 939 int preserve_dir) | |
| 940 { | |
| 941 int e = -1; | |
| 942 int i; | |
| 943 char* path_tempdir = NULL; | |
| 944 char* path = NULL; | |
| 945 char* text = NULL; | |
| 946 char* text2 = NULL; | |
| 947 | |
| 948 assert(path_out); | |
| 949 assert(path_template); | |
| 950 | |
| 951 if (extract_check_path_shell_safe(path_out)) | |
| 952 { | |
| 953 outf("path_out is unsafe: %s", path_out); | |
| 954 goto end; | |
| 955 } | |
| 956 | |
| 957 outf("images->images_num=%i", images->images_num); | |
| 958 if (extract_asprintf(alloc, &path_tempdir, "%s.dir", path_out) < 0) goto end; | |
| 959 if (extract_systemf(alloc, "rm -r '%s' 2>/dev/null", path_tempdir) < 0) goto end; | |
| 960 | |
| 961 if (extract_mkdir(path_tempdir, 0777)) | |
| 962 { | |
| 963 outf("Failed to create directory: %s", path_tempdir); | |
| 964 goto end; | |
| 965 } | |
| 966 | |
| 967 outf("Unzipping template document '%s' to tempdir: %s", | |
| 968 path_template, path_tempdir); | |
| 969 if (extract_systemf(alloc, "unzip -q -d '%s' '%s'", path_tempdir, path_template)) | |
| 970 { | |
| 971 outf("Failed to unzip %s into %s", | |
| 972 path_template, path_tempdir); | |
| 973 goto end; | |
| 974 } | |
| 975 | |
| 976 /* Might be nice to iterate through all items in path_tempdir, but for now | |
| 977 we look at just the items that we know extract_odt_content_item() will | |
| 978 modify. */ | |
| 979 | |
| 980 { | |
| 981 const char *names[] = | |
| 982 { | |
| 983 "content.xml", | |
| 984 "META-INF/manifest.xml", | |
| 985 }; | |
| 986 int names_num = sizeof(names) / sizeof(names[0]); | |
| 987 for (i=0; i<names_num; ++i) | |
| 988 { | |
| 989 const char* name = names[i]; | |
| 990 extract_free(alloc, &path); | |
| 991 extract_free(alloc, &text); | |
| 992 extract_free(alloc, &text2); | |
| 993 if (extract_asprintf(alloc, &path, "%s/%s", path_tempdir, name) < 0) goto end; | |
| 994 if (extract_read_all_path(alloc, path, &text)) goto end; | |
| 995 | |
| 996 outf("before extract_odt_content_item() styles->styles_num=%i", styles->styles_num); | |
| 997 if (extract_odt_content_item( | |
| 998 alloc, | |
| 999 contentss, | |
| 1000 contentss_num, | |
| 1001 styles, | |
| 1002 images, | |
| 1003 name, | |
| 1004 text, | |
| 1005 &text2 | |
| 1006 )) | |
| 1007 { | |
| 1008 outf("extract_odt_content_item() failed"); | |
| 1009 goto end; | |
| 1010 } | |
| 1011 | |
| 1012 outf("after extract_odt_content_item styles->styles_num=%i", styles->styles_num); | |
| 1013 | |
| 1014 { | |
| 1015 const char* text3 = (text2) ? text2 : text; | |
| 1016 if (extract_write_all(text3, strlen(text3), path)) goto end; | |
| 1017 outf("have written to path=%s", path); | |
| 1018 } | |
| 1019 } | |
| 1020 } | |
| 1021 | |
| 1022 /* Copy images into <path_tempdir>/Pictures/. */ | |
| 1023 extract_free(alloc, &path); | |
| 1024 if (extract_asprintf(alloc, &path, "%s/Pictures", path_tempdir) < 0) goto end; | |
| 1025 if (extract_mkdir(path, 0777)) | |
| 1026 { | |
| 1027 outf("Failed to mkdir %s", path); | |
| 1028 goto end; | |
| 1029 } | |
| 1030 for (i=0; i<images->images_num; ++i) | |
| 1031 { | |
| 1032 image_t* image = images->images[i]; | |
| 1033 extract_free(alloc, &path); | |
| 1034 if (extract_asprintf(alloc, &path, "%s/Pictures/%s", path_tempdir, image->name) < 0) goto end; | |
| 1035 if (extract_write_all(image->data, image->data_size, path)) goto end; | |
| 1036 } | |
| 1037 | |
| 1038 outf("Zipping tempdir to create %s", path_out); | |
| 1039 { | |
| 1040 const char* path_out_leaf = strrchr(path_out, '/'); | |
| 1041 if (!path_out_leaf) path_out_leaf = path_out; | |
| 1042 if (extract_systemf(alloc, "cd '%s' && zip -q -r -D '../%s' .", path_tempdir, path_out_leaf)) | |
| 1043 { | |
| 1044 outf("Zip command failed to convert '%s' directory into output file: %s", | |
| 1045 path_tempdir, path_out); | |
| 1046 goto end; | |
| 1047 } | |
| 1048 } | |
| 1049 | |
| 1050 if (!preserve_dir) | |
| 1051 { | |
| 1052 if (extract_remove_directory(alloc, path_tempdir)) goto end; | |
| 1053 } | |
| 1054 | |
| 1055 e = 0; | |
| 1056 | |
| 1057 end: | |
| 1058 outf("e=%i", e); | |
| 1059 extract_free(alloc, &path_tempdir); | |
| 1060 extract_free(alloc, &path); | |
| 1061 extract_free(alloc, &text); | |
| 1062 extract_free(alloc, &text2); | |
| 1063 | |
| 1064 if (e) | |
| 1065 { | |
| 1066 outf("Failed to create %s", path_out); | |
| 1067 } | |
| 1068 return e; | |
| 1069 } |
