Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/gumbo-parser/python/gumbo/gumboc_test.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 # Copyright 2012 Google Inc. All Rights Reserved. | |
| 2 # | |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 # you may not use this file except in compliance with the License. | |
| 5 # You may obtain a copy of the License at | |
| 6 # | |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 # | |
| 9 # Unless required by applicable law or agreed to in writing, software | |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 # See the License for the specific language governing permissions and | |
| 13 # limitations under the License. | |
| 14 # | |
| 15 | |
| 16 """Tests for Gumbo CTypes bindings.""" | |
| 17 | |
| 18 __author__ = 'jdtang@google.com (Jonathan Tang)' | |
| 19 | |
| 20 import StringIO | |
| 21 | |
| 22 import unittest | |
| 23 | |
| 24 import gumboc | |
| 25 | |
| 26 | |
| 27 class CtypesTest(unittest.TestCase): | |
| 28 def testWordParse(self): | |
| 29 with gumboc.parse('Test') as output: | |
| 30 doctype_node = output.contents.document.contents | |
| 31 self.assertEquals(gumboc.NodeType.DOCUMENT, doctype_node.type) | |
| 32 document = doctype_node.v.document | |
| 33 self.assertEquals('', document.name) | |
| 34 self.assertEquals('', document.public_identifier) | |
| 35 self.assertEquals('', document.system_identifier) | |
| 36 | |
| 37 root = output.contents.root.contents | |
| 38 self.assertEquals(gumboc.NodeType.ELEMENT, root.type) | |
| 39 self.assertEquals(gumboc.Tag.HTML, root.tag) | |
| 40 self.assertEquals(gumboc.Namespace.HTML, root.tag_namespace) | |
| 41 self.assertEquals(2, len(root.children)) | |
| 42 | |
| 43 head = root.children[0] | |
| 44 self.assertEquals(gumboc.NodeType.ELEMENT, head.type) | |
| 45 self.assertEquals(gumboc.Tag.HEAD, head.tag) | |
| 46 self.assertEquals('head', head.tag_name) | |
| 47 self.assertEquals(gumboc.Namespace.HTML, head.tag_namespace) | |
| 48 self.assertEquals(0, len(head.original_tag)) | |
| 49 self.assertEquals('', str(head.original_end_tag)) | |
| 50 self.assertEquals(0, head.children.length) | |
| 51 | |
| 52 body = root.children[1] | |
| 53 self.assertNotEquals(body, doctype_node) | |
| 54 self.assertEquals(gumboc.NodeType.ELEMENT, body.type) | |
| 55 self.assertEquals(gumboc.Tag.BODY, body.tag) | |
| 56 self.assertEquals('body', body.tag_name) | |
| 57 self.assertEquals(1, len(body.children)) | |
| 58 | |
| 59 text_node = body.children[0] | |
| 60 self.assertEquals(gumboc.NodeType.TEXT, text_node.type) | |
| 61 self.assertEquals('Test', text_node.text) | |
| 62 | |
| 63 def testBufferThatGoesAway(self): | |
| 64 for i in range(10): | |
| 65 source = StringIO.StringIO('<foo bar=quux>1<p>2</foo>') | |
| 66 parse_tree = gumboc.parse(source.read()) | |
| 67 source.close() | |
| 68 with parse_tree as output: | |
| 69 root = output.contents.root.contents | |
| 70 body = root.children[1] | |
| 71 foo = body.children[0] | |
| 72 self.assertEquals(gumboc.NodeType.ELEMENT, foo.type) | |
| 73 self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag) | |
| 74 self.assertEquals('<foo bar=quux>', str(foo.original_tag)) | |
| 75 self.assertEquals('', str(foo.original_end_tag)) | |
| 76 self.assertEquals('foo', foo.tag_name.decode('utf-8')) | |
| 77 self.assertEquals('bar', foo.attributes[0].name) | |
| 78 self.assertEquals('quux', foo.attributes[0].value) | |
| 79 | |
| 80 def testUnknownTag(self): | |
| 81 with gumboc.parse('<foo bar=quux>1<p>2</foo>') as output: | |
| 82 root = output.contents.root.contents | |
| 83 body = root.children[1] | |
| 84 foo = body.children[0] | |
| 85 self.assertEquals(gumboc.NodeType.ELEMENT, foo.type) | |
| 86 self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag) | |
| 87 self.assertEquals('<foo bar=quux>', str(foo.original_tag)) | |
| 88 self.assertEquals('', str(foo.original_end_tag)) | |
| 89 self.assertEquals('foo', foo.tag_name.decode('utf-8')) | |
| 90 self.assertEquals('bar', foo.attributes[0].name) | |
| 91 self.assertEquals('quux', foo.attributes[0].value) | |
| 92 | |
| 93 def testSarcasm(self): | |
| 94 with gumboc.parse('<div><sarcasm><div></div></sarcasm></div>') as output: | |
| 95 root = output.contents.root.contents | |
| 96 body = root.children[1] | |
| 97 div = body.children[0] | |
| 98 sarcasm = div.children[0] | |
| 99 self.assertEquals(gumboc.NodeType.ELEMENT, sarcasm.type) | |
| 100 self.assertEquals(gumboc.Tag.UNKNOWN, sarcasm.tag) | |
| 101 self.assertEquals('<sarcasm>', str(sarcasm.original_tag)) | |
| 102 self.assertEquals('</sarcasm>', str(sarcasm.original_end_tag)) | |
| 103 self.assertEquals('sarcasm', sarcasm.tag_name.decode('utf-8')) | |
| 104 | |
| 105 def testEnums(self): | |
| 106 self.assertEquals(gumboc.Tag.A, gumboc.Tag.A) | |
| 107 self.assertEquals(hash(gumboc.Tag.A.value), hash(gumboc.Tag.A)) | |
| 108 | |
| 109 def testFragment(self): | |
| 110 with gumboc.parse( | |
| 111 '<div></div>', | |
| 112 fragment_context=gumboc.Tag.TITLE, | |
| 113 fragment_namespace=gumboc.Namespace.SVG) as output: | |
| 114 root = output.contents.root.contents | |
| 115 self.assertEquals(1, len(root.children)) | |
| 116 div = root.children[0] | |
| 117 self.assertEquals(gumboc.NodeType.ELEMENT, div.type) | |
| 118 self.assertEquals(gumboc.Tag.DIV, div.tag) | |
| 119 self.assertEquals(gumboc.Namespace.HTML, div.tag_namespace) | |
| 120 | |
| 121 | |
| 122 | |
| 123 | |
| 124 if __name__ == '__main__': | |
| 125 unittest.main() |
