comparison mupdf-source/thirdparty/zint/docs/zint_org_uk.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 # This script takes the output from pandoc and converts it into the format needed by
2 # the website at Zint.org.uk
3 #
4 # Warning: This code is ugly... but it saves days of manual effort updating the website.
5 #
6 # Copyright (C) 2022 <rstuart114@gmail.com>
7
8 # Works out which tags should influence indentation and puts them on their own line
9 def isolate_tag(tag):
10 global stage
11
12 indentable_tag = True
13 for keyword in indent_skip:
14 if keyword in tag:
15 indentable_tag = False
16
17 if '</' in tag:
18 # Close tag
19 if (indentable_tag):
20 stage += "\n"
21 stage += tag
22 stage += "\n"
23 else:
24 stage += tag
25 else:
26 # Open tag
27 if (indentable_tag):
28 stage += "\n"
29 stage += tag
30 stage += "\n"
31 else:
32 stage += tag
33
34 # Add the right amount of indendation (indentation X 4 spaces)
35 def add_indent():
36 global indentation
37 retval = ""
38
39 for i in range(0,indentation):
40 retval += " "
41
42 return retval
43
44 # Apply indentation to text
45 def with_indent(text):
46 global indentation
47 retval = ""
48 d = ''
49
50 for c in text:
51 if d == '\n':
52 retval += d
53 retval += add_indent()
54 else:
55 retval += d
56 d = c
57
58 retval += d
59
60 return retval
61
62 # Read file and pull some tags onto their own lines for later processing
63 manual = ""
64 tag = False
65 tag_buffer = ""
66 text_buffer = ""
67 stage = ""
68 indent_skip = ['img', 'code', 'pre', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', '<a', '</a', 'sup', '<col', '</col', '<hr', 'div']
69
70 print("Reading... manual.html")
71 with open('manual.html') as f:
72 manual = f.read()
73
74 for c in manual:
75 if c == '<':
76 stage += text_buffer
77 tag = True
78 tag_buffer = ""
79
80 if (tag):
81 tag_buffer += c
82 else:
83 text_buffer += c
84
85 if c == '>':
86 tag_buffer = tag_buffer.replace("\n", " ")
87 isolate_tag(tag_buffer)
88 tag = False
89 text_buffer = ""
90
91 f.close()
92 manual = stage
93 stage = ""
94
95 print("Adjusting HTML")
96 # Change the guts of the HTML tags
97 in_dd = False
98 to_remove = False
99 remove_next = False
100 span_literal = False
101 for c in manual:
102 if c == '<':
103 # Remove "{#tbl:" table identifiers
104 if '{#tbl:' in text_buffer:
105 text_buffer = text_buffer[text_buffer.index('tag=') + 7:-3]
106 text_buffer = text_buffer.replace('\n', ' ')
107 text_buffer = '\n' + text_buffer + '\n'
108
109 # Remove "{@tabl:" table references
110 if 'tbl:' in text_buffer:
111 text_buffer = ''
112
113 stage += text_buffer
114 tag = True
115 tag_buffer = ""
116 to_remove = False
117
118 if (tag):
119 tag_buffer += c
120 else:
121 text_buffer += c
122
123 if c == '>':
124 # Remove some tags which aren't needed on website
125 if 'span' in tag_buffer:
126 to_remove = True
127
128 if 'div' in tag_buffer:
129 to_remove = True
130
131 if '<col' in tag_buffer:
132 to_remove = True
133
134 if '</col' in tag_buffer:
135 to_remove = True
136
137 if (remove_next):
138 to_remove = True
139 remove_next = False
140
141 if ('a href' in tag_buffer) and ('aria-hidden="true"' in tag_buffer):
142 to_remove = True
143 remove_next = True
144
145 if '<a href="#' in tag_buffer:
146 to_remove = True
147 remove_next = True
148
149 # Don't allow <p> and </p> between <dd> and </dd>
150 if (tag_buffer == "<dd>"):
151 in_dd = True
152 if (tag_buffer == "</dd>"):
153 in_dd = False
154
155 if (in_dd and tag_buffer == '<p>'):
156 to_remove = True
157
158 if (in_dd and tag_buffer == '</p>'):
159 to_remove = True
160
161 # Remove attributes for some tags
162 if '<pre' in tag_buffer:
163 tag_buffer = '<pre>'
164
165 if '<table' in tag_buffer:
166 tag_buffer = '<table>'
167
168 if '<tr' in tag_buffer:
169 tag_buffer = '<tr>'
170
171 if '<td' in tag_buffer:
172 tag_buffer = '<td>'
173
174 if '<th ' in tag_buffer:
175 tag_buffer = '<th>'
176
177 # Bump all headers up one level
178 tag_buffer = tag_buffer.replace('<h6', '<h7')
179 tag_buffer = tag_buffer.replace('</h6', '</h7')
180 tag_buffer = tag_buffer.replace('<h5', '<h6')
181 tag_buffer = tag_buffer.replace('</h5', '</h6')
182 tag_buffer = tag_buffer.replace('<h4', '<h5')
183 tag_buffer = tag_buffer.replace('</h4', '</h5')
184 tag_buffer = tag_buffer.replace('<h3', '<h4')
185 tag_buffer = tag_buffer.replace('</h3', '</h4')
186 tag_buffer = tag_buffer.replace('<h2', '<h3')
187 tag_buffer = tag_buffer.replace('</h2', '</h3')
188 tag_buffer = tag_buffer.replace('<h1', '<h2')
189 tag_buffer = tag_buffer.replace('</h1', '</h2')
190
191 # Change class names for code snippets
192 tag_buffer = tag_buffer.replace('class="sourceCode bash"', 'class="language-bash"')
193 tag_buffer = tag_buffer.replace('class="sourceCode c"', 'class="language-cpp"')
194
195 # Change location of images
196 tag_buffer = tag_buffer.replace('src="images/', 'src="/images/manual/')
197
198 # Change <code> without language to <span>
199 if tag_buffer == '<code>':
200 tag_buffer = '<span class="literal">'
201 span_literal = True
202
203 if tag_buffer == '</code>' and span_literal:
204 tag_buffer = '</span>'
205 span_literal = False
206
207 if not to_remove:
208 stage += tag_buffer
209 tag = False
210 text_buffer = ""
211
212 manual = stage
213 stage = ""
214
215 print("Removing empty lines")
216 # Remove blank lines unless in between <pre> and </pre>
217 last_char = ''
218 in_pre = False
219 for c in manual:
220 if c == '<':
221 tag = True
222 tag_buffer = ""
223
224 if (tag):
225 tag_buffer += c
226 else:
227 text_buffer += c
228
229 if c == '>':
230 if ("<pre" in tag_buffer):
231 in_pre = True
232 if ("</pre" in tag_buffer):
233 in_pre = False
234 tag = False
235 text_buffer = ""
236
237 if c == '\n':
238 if (last_char != '\n') or (in_pre == True):
239 stage += c
240 else:
241 stage += c
242 last_char = c
243
244 manual = stage
245 stage = ""
246
247 print("Applying indentation")
248 # Indent the code to make it easier to read
249 indentation = 1
250 in_pre = False
251 paragraph_block = False
252 document_start = True
253 chapter_six = False
254 last_char = ''
255 for c in manual:
256 if c == '<':
257 #Fix 'floating' full stops
258 text_buffer = text_buffer.replace(' . ', '. ')
259
260 # Apply indentation to text
261 if in_pre:
262 stage += text_buffer
263 else:
264 stage += with_indent(text_buffer)
265 tag = True
266 tag_buffer = ""
267
268 if (tag):
269 tag_buffer += c
270 else:
271 # Strip '{}' from already removed table references
272 if c == '}' and last_char == '{':
273 text_buffer = text_buffer[:-1]
274 else:
275 text_buffer += c
276 last_char = c
277
278 if c == '>':
279 indentable_tag = True
280 for keyword in indent_skip:
281 if keyword in tag_buffer:
282 indentable_tag = False
283
284 # Protect the indentation in <pre> segments
285 if ('<pre' in tag_buffer):
286 in_pre = True
287 if ('</pre' in tag_buffer):
288 in_pre = False
289
290 # Chapter 6 requires special treatment - detect beginning and end
291 if ('id="types-of-symbology"' in tag_buffer):
292 chapter_six = True
293 if ('id="legal-and-version-information"' in tag_buffer):
294 chapter_six = False
295
296 if '</' in tag_buffer:
297 # Close tag
298 if (indentable_tag):
299 indentation -= 1
300 stage += add_indent()
301 stage += tag_buffer
302 else:
303 if text_buffer.endswith('\n'):
304 stage += add_indent()
305 stage += tag_buffer
306 else:
307 # Split into sections
308 if (indentation == 1) and ('<p' in tag_buffer):
309 if not paragraph_block:
310 if document_start:
311 document_start = False
312 else:
313 stage += '</section>\n'
314 stage += '<section class="container">\n'
315 paragraph_block = True
316
317 # Handle headers but also decide where to split into multiple HTML files and mark with <page>
318 if (indentation == 1):
319 if ('<h2' in tag_buffer):
320 if document_start:
321 document_start = False
322 stage += '<section class="container">\n'
323 paragraph_block = True
324 else:
325 stage += '</section>\n'
326 stage += '<page>\n'
327 stage += '<section class="container">\n'
328 paragraph_block = True
329 elif ('<h3' in tag_buffer) and chapter_six:
330 stage += '</section>\n'
331 stage += '<page>\n'
332 stage += '<section class="container">\n'
333 paragraph_block = True
334 elif ('<h' in tag_buffer):
335 if not paragraph_block:
336 stage += '</section>\n'
337 stage += '<section class="container">\n'
338 paragraph_block = True
339
340 # <dl> section has it's own class
341 if (indentation == 1) and ('<dl' in tag_buffer):
342 stage += '</section>\n'
343 stage += '<section class="definition-list container">\n'
344 paragraph_block = False
345
346 # <table> section has it's own class
347 if (indentation == 1) and ('<table' in tag_buffer):
348 stage += '</section>\n'
349 stage += '<section class="table">\n'
350 paragraph_block = False
351
352 # Open tag
353 if (indentable_tag):
354 stage += add_indent()
355 stage += tag_buffer
356 indentation += 1
357 else:
358 if text_buffer.endswith('\n'):
359 stage += add_indent()
360 stage += tag_buffer
361 tag = False
362 text_buffer = ""
363
364 stage += '\n</section>\n'
365 manual = stage
366 stage = ""
367
368 # Remove <h2> data and split into output files
369 out_filenames = ['chapter1.html', 'chapter2.html', 'chapter3.html', 'chapter4.html', 'chapter5.html',
370 'chapter6.0.html', 'chapter6.1.html', 'chapter6.2.html', 'chapter6.3.html', 'chapter6.4.html',
371 'chapter6.5.html', 'chapter6.6.html', 'chapter6.7.html', 'chapter7.html', 'appendixa.html', 'appendixb.html']
372 page = 0
373 print("Writing... ", out_filenames[page])
374 f = open(out_filenames[page], "w")
375 h2_tag = False
376 for c in manual:
377 if c == '<':
378 if h2_tag == False:
379 stage += text_buffer
380 tag = True
381 tag_buffer = ""
382
383 if (tag):
384 tag_buffer += c
385 else:
386 text_buffer += c
387
388 if c == '>':
389 if '<h2' in tag_buffer:
390 h2_tag = True
391 elif '</h2' in tag_buffer:
392 h2_tag = False
393 elif tag_buffer == '<page>':
394 f.write(stage)
395 f.close()
396 stage = ""
397 page += 1
398 print("Writing... ", out_filenames[page])
399 f = open(out_filenames[page], "w")
400 else:
401 stage += tag_buffer
402 tag = False
403 text_buffer = ""
404
405 f.write(stage)
406 f.close()