Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/extract/src/docx.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/extract/src/docx.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,85 @@ +#ifndef ARTIFEX_EXTRACT_DOCX_H +#define ARTIFEX_EXTRACT_DOCX_H + +/* Only for internal use by extract code. */ + +/* Things for creating docx files. */ + +/* + Make *o_content point to a string containing all paragraphs, images and + tables (tables as of 2021-07-22) in *document in docx XML format. + + This string can be passed to extract_docx_content_item() or + extract_docx_write_template() to be inserted into a docx archive's + word/document.xml. +*/ +int extract_document_to_docx_content( + extract_alloc_t *alloc, + document_t *document, + int spacing, + int rotation, + int images, + extract_astring_t *content); + + +/* + Creates a new docx file using a provided template document. + + Uses the 'zip' and 'unzip' commands internally. + + contentss + contentss_num + Content to be inserted into word/document.xml. + document + . + images + Information about images. + path_template + Name of docx file to use as a template. + path_out + Name of docx file to create. Must not contain single-quote, double quote, + space or ".." sequence - these will force EINVAL error because they could + make internal shell commands unsafe. + preserve_dir + If true, we don't delete the temporary directory <path_out>.dir containing + unzipped docx content. +*/ +int extract_docx_write_template( + extract_alloc_t *alloc, + extract_astring_t *contentss, + int contentss_num, + images_t *images, + const char *path_template, + const char *path_out, + int preserve_dir); + + +/* + Determine content of <name> in docx archive. + + content + content_length + Text to insert if <name> is word/document.xml. + images + Information about images. If <name> is word/document.xml we insert + relationship information mapping from image ids to image names; + <text> should already contain reference ids for images. If <name> is + [Content_Types].xml we insert information about image types. + name + Path within the docx zip archive. + text + Content of <name> in template docx file. + text2 + Out-param. Set to NULL if <text> should be used unchanged. Otherwise set + to point to desired text, allocated with malloc() which caller should free. +*/ +int extract_docx_content_item( + extract_alloc_t *alloc, + extract_astring_t *contentss, + int contentss_num, + images_t *images, + const char *name, + const char *text, + char **text2); + +#endif
