Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/zint/docs/zint_org_uk.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/zint/docs/zint_org_uk.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,406 @@
+# This script takes the output from pandoc and converts it into the format needed by
+# the website at Zint.org.uk
+#
+# Warning: This code is ugly... but it saves days of manual effort updating the website.
+#
+# Copyright (C) 2022 <rstuart114@gmail.com>
+
+# Works out which tags should influence indentation and puts them on their own line
+def isolate_tag(tag):
+    global stage
+    
+    indentable_tag = True
+    for keyword in indent_skip:
+        if keyword in tag:
+            indentable_tag = False
+    
+    if '</' in tag:
+        # Close tag
+        if (indentable_tag):
+            stage += "\n"
+            stage += tag
+            stage += "\n"
+        else:
+            stage += tag
+    else:
+        # Open tag
+        if (indentable_tag):
+            stage += "\n"
+            stage += tag
+            stage += "\n"
+        else:
+            stage += tag
+
+# Add the right amount of indendation (indentation X 4 spaces)
+def add_indent():
+    global indentation
+    retval = ""
+    
+    for i in range(0,indentation):
+        retval += "    "
+        
+    return retval
+
+# Apply indentation to text
+def with_indent(text):
+    global indentation
+    retval = ""
+    d = ''
+    
+    for c in text:
+        if d == '\n':
+            retval += d
+            retval += add_indent()
+        else:
+            retval += d
+        d = c
+        
+    retval += d
+            
+    return retval
+
+# Read file and pull some tags onto their own lines for later processing
+manual = ""
+tag = False
+tag_buffer = ""
+text_buffer = ""
+stage = ""
+indent_skip = ['img', 'code', 'pre', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', '<a', '</a', 'sup', '<col', '</col', '<hr', 'div']
+
+print("Reading... manual.html")
+with open('manual.html') as f:
+    manual = f.read()
+    
+    for c in manual:
+        if c == '<':
+            stage += text_buffer
+            tag = True
+            tag_buffer = ""
+        
+        if (tag):
+            tag_buffer += c
+        else:
+            text_buffer += c
+            
+        if c == '>':
+            tag_buffer = tag_buffer.replace("\n", " ")
+            isolate_tag(tag_buffer)
+            tag = False
+            text_buffer = ""
+            
+f.close()
+manual = stage
+stage = ""
+
+print("Adjusting HTML")
+# Change the guts of the HTML tags
+in_dd = False
+to_remove = False
+remove_next = False
+span_literal = False
+for c in manual:
+    if c == '<':
+        # Remove "{#tbl:" table identifiers
+        if '{#tbl:' in text_buffer:
+            text_buffer = text_buffer[text_buffer.index('tag=') + 7:-3]
+            text_buffer = text_buffer.replace('\n', ' ')
+            text_buffer = '\n' + text_buffer + '\n'
+            
+        # Remove "{@tabl:" table references
+        if 'tbl:' in text_buffer:
+            text_buffer = ''
+        
+        stage += text_buffer
+        tag = True
+        tag_buffer = ""
+        to_remove = False
+    
+    if (tag):
+        tag_buffer += c
+    else:
+        text_buffer += c
+        
+    if c == '>':
+        # Remove some tags which aren't needed on website
+        if 'span' in tag_buffer:
+            to_remove = True
+            
+        if 'div' in tag_buffer:
+            to_remove = True
+            
+        if '<col' in tag_buffer:
+            to_remove = True
+            
+        if '</col' in tag_buffer:
+            to_remove = True
+            
+        if (remove_next):
+            to_remove = True
+            remove_next = False
+            
+        if ('a href' in tag_buffer) and ('aria-hidden="true"' in tag_buffer):
+            to_remove = True
+            remove_next = True
+            
+        if '<a href="#' in tag_buffer:
+            to_remove = True
+            remove_next = True        
+            
+        # Don't allow <p> and </p> between <dd> and </dd>
+        if (tag_buffer == "<dd>"):
+            in_dd = True
+        if (tag_buffer == "</dd>"):
+            in_dd = False
+            
+        if (in_dd and tag_buffer == '<p>'):
+            to_remove = True
+            
+        if (in_dd and tag_buffer == '</p>'):
+            to_remove = True
+            
+        # Remove attributes for some tags
+        if '<pre' in tag_buffer:
+            tag_buffer = '<pre>'
+            
+        if '<table' in tag_buffer:
+            tag_buffer = '<table>'
+            
+        if '<tr' in tag_buffer:
+            tag_buffer = '<tr>'
+            
+        if '<td' in tag_buffer:
+            tag_buffer = '<td>'
+        
+        if '<th ' in tag_buffer:
+            tag_buffer = '<th>'
+            
+        # Bump all headers up one level
+        tag_buffer = tag_buffer.replace('<h6', '<h7')
+        tag_buffer = tag_buffer.replace('</h6', '</h7')
+        tag_buffer = tag_buffer.replace('<h5', '<h6')
+        tag_buffer = tag_buffer.replace('</h5', '</h6')
+        tag_buffer = tag_buffer.replace('<h4', '<h5')
+        tag_buffer = tag_buffer.replace('</h4', '</h5')
+        tag_buffer = tag_buffer.replace('<h3', '<h4')
+        tag_buffer = tag_buffer.replace('</h3', '</h4')
+        tag_buffer = tag_buffer.replace('<h2', '<h3')
+        tag_buffer = tag_buffer.replace('</h2', '</h3')
+        tag_buffer = tag_buffer.replace('<h1', '<h2')
+        tag_buffer = tag_buffer.replace('</h1', '</h2')
+        
+        # Change class names for code snippets
+        tag_buffer = tag_buffer.replace('class="sourceCode bash"', 'class="language-bash"')
+        tag_buffer = tag_buffer.replace('class="sourceCode c"', 'class="language-cpp"')
+        
+        # Change location of images
+        tag_buffer = tag_buffer.replace('src="images/', 'src="/images/manual/')
+        
+        # Change <code> without language to <span>
+        if tag_buffer == '<code>':
+            tag_buffer = '<span class="literal">'
+            span_literal = True
+            
+        if tag_buffer == '</code>' and span_literal:
+            tag_buffer = '</span>'
+            span_literal = False
+
+        if not to_remove:
+            stage += tag_buffer
+        tag = False
+        text_buffer = ""
+        
+manual = stage
+stage = ""
+
+print("Removing empty lines")
+# Remove blank lines unless in between <pre> and </pre>
+last_char = ''
+in_pre = False
+for c in manual:
+    if c == '<':
+        tag = True
+        tag_buffer = ""
+    
+    if (tag):
+        tag_buffer += c
+    else:
+        text_buffer += c
+        
+    if c == '>':
+        if ("<pre" in tag_buffer):
+            in_pre = True
+        if ("</pre" in tag_buffer):
+            in_pre = False
+        tag = False
+        text_buffer = ""
+    
+    if c == '\n':
+        if (last_char != '\n') or (in_pre == True):
+            stage += c
+    else:
+        stage += c
+    last_char = c
+            
+manual = stage
+stage = ""
+
+print("Applying indentation")
+# Indent the code to make it easier to read
+indentation = 1
+in_pre = False
+paragraph_block = False
+document_start = True
+chapter_six = False
+last_char = ''
+for c in manual:
+    if c == '<':
+        #Fix 'floating' full stops
+        text_buffer = text_buffer.replace(' . ', '. ')
+        
+        # Apply indentation to text
+        if in_pre:
+            stage += text_buffer
+        else:
+            stage += with_indent(text_buffer)
+        tag = True
+        tag_buffer = ""
+    
+    if (tag):
+        tag_buffer += c
+    else:
+        # Strip '{}' from already removed table references
+        if c == '}' and last_char == '{':
+            text_buffer = text_buffer[:-1]
+        else:
+            text_buffer += c
+        last_char = c
+        
+    if c == '>':
+        indentable_tag = True
+        for keyword in indent_skip:
+            if keyword in tag_buffer:
+                indentable_tag = False
+        
+        # Protect the indentation in <pre> segments
+        if ('<pre' in tag_buffer):
+            in_pre = True
+        if ('</pre' in tag_buffer):
+            in_pre = False
+            
+        # Chapter 6 requires special treatment - detect beginning and end
+        if ('id="types-of-symbology"' in tag_buffer):
+            chapter_six = True
+        if ('id="legal-and-version-information"' in tag_buffer):
+            chapter_six = False
+        
+        if '</' in tag_buffer:
+            # Close tag
+            if (indentable_tag):
+                indentation -= 1
+                stage += add_indent()
+                stage += tag_buffer
+            else:
+                if text_buffer.endswith('\n'):
+                    stage += add_indent()
+                stage += tag_buffer
+        else:
+            # Split into sections
+            if (indentation == 1) and ('<p' in tag_buffer):
+                if not paragraph_block:
+                    if document_start:
+                        document_start = False
+                    else:
+                        stage += '</section>\n'
+                    stage += '<section class="container">\n'
+                    paragraph_block = True
+                    
+            # Handle headers but also decide where to split into multiple HTML files and mark with <page>
+            if (indentation == 1):
+                if ('<h2' in tag_buffer):
+                    if document_start:
+                        document_start = False
+                        stage += '<section class="container">\n'
+                        paragraph_block = True
+                    else:
+                        stage += '</section>\n'
+                        stage += '<page>\n'
+                        stage += '<section class="container">\n'
+                        paragraph_block = True
+                elif ('<h3' in tag_buffer) and chapter_six:
+                        stage += '</section>\n'
+                        stage += '<page>\n'
+                        stage += '<section class="container">\n'
+                        paragraph_block = True
+                elif ('<h' in tag_buffer):
+                    if not paragraph_block:
+                        stage += '</section>\n'
+                        stage += '<section class="container">\n'
+                        paragraph_block = True
+                        
+            # <dl> section has it's own class
+            if (indentation == 1) and ('<dl' in tag_buffer):
+                stage += '</section>\n'
+                stage += '<section class="definition-list container">\n'
+                paragraph_block = False
+                
+            # <table> section has it's own class
+            if (indentation == 1) and ('<table' in tag_buffer):
+                stage += '</section>\n'
+                stage += '<section class="table">\n'
+                paragraph_block = False
+            
+            # Open tag
+            if (indentable_tag):
+                stage += add_indent()
+                stage += tag_buffer
+                indentation += 1
+            else:
+                if text_buffer.endswith('\n'):
+                    stage += add_indent()
+                stage += tag_buffer
+        tag = False
+        text_buffer = ""
+
+stage += '\n</section>\n'
+manual = stage
+stage = ""
+
+# Remove <h2> data and split into output files
+out_filenames = ['chapter1.html', 'chapter2.html', 'chapter3.html', 'chapter4.html', 'chapter5.html',
+                 'chapter6.0.html', 'chapter6.1.html', 'chapter6.2.html', 'chapter6.3.html', 'chapter6.4.html',
+                 'chapter6.5.html', 'chapter6.6.html', 'chapter6.7.html', 'chapter7.html', 'appendixa.html', 'appendixb.html']
+page = 0
+print("Writing... ", out_filenames[page])
+f = open(out_filenames[page], "w")
+h2_tag = False
+for c in manual:
+    if c == '<':
+        if h2_tag == False:
+            stage += text_buffer
+        tag = True
+        tag_buffer = ""
+    
+    if (tag):
+        tag_buffer += c
+    else:
+        text_buffer += c
+        
+    if c == '>':
+        if '<h2' in tag_buffer:
+            h2_tag = True
+        elif '</h2' in tag_buffer:
+            h2_tag = False
+        elif tag_buffer == '<page>':
+            f.write(stage)
+            f.close()
+            stage = ""
+            page += 1
+            print("Writing... ", out_filenames[page])
+            f = open(out_filenames[page], "w")
+        else:
+            stage += tag_buffer
+        tag = False
+        text_buffer = ""
+
+f.write(stage)
+f.close()
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children