comparison mupdf-source/thirdparty/gumbo-parser/python/gumbo/gumboc_test.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 # Copyright 2012 Google Inc. All Rights Reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15
16 """Tests for Gumbo CTypes bindings."""
17
18 __author__ = 'jdtang@google.com (Jonathan Tang)'
19
20 import StringIO
21
22 import unittest
23
24 import gumboc
25
26
27 class CtypesTest(unittest.TestCase):
28 def testWordParse(self):
29 with gumboc.parse('Test') as output:
30 doctype_node = output.contents.document.contents
31 self.assertEquals(gumboc.NodeType.DOCUMENT, doctype_node.type)
32 document = doctype_node.v.document
33 self.assertEquals('', document.name)
34 self.assertEquals('', document.public_identifier)
35 self.assertEquals('', document.system_identifier)
36
37 root = output.contents.root.contents
38 self.assertEquals(gumboc.NodeType.ELEMENT, root.type)
39 self.assertEquals(gumboc.Tag.HTML, root.tag)
40 self.assertEquals(gumboc.Namespace.HTML, root.tag_namespace)
41 self.assertEquals(2, len(root.children))
42
43 head = root.children[0]
44 self.assertEquals(gumboc.NodeType.ELEMENT, head.type)
45 self.assertEquals(gumboc.Tag.HEAD, head.tag)
46 self.assertEquals('head', head.tag_name)
47 self.assertEquals(gumboc.Namespace.HTML, head.tag_namespace)
48 self.assertEquals(0, len(head.original_tag))
49 self.assertEquals('', str(head.original_end_tag))
50 self.assertEquals(0, head.children.length)
51
52 body = root.children[1]
53 self.assertNotEquals(body, doctype_node)
54 self.assertEquals(gumboc.NodeType.ELEMENT, body.type)
55 self.assertEquals(gumboc.Tag.BODY, body.tag)
56 self.assertEquals('body', body.tag_name)
57 self.assertEquals(1, len(body.children))
58
59 text_node = body.children[0]
60 self.assertEquals(gumboc.NodeType.TEXT, text_node.type)
61 self.assertEquals('Test', text_node.text)
62
63 def testBufferThatGoesAway(self):
64 for i in range(10):
65 source = StringIO.StringIO('<foo bar=quux>1<p>2</foo>')
66 parse_tree = gumboc.parse(source.read())
67 source.close()
68 with parse_tree as output:
69 root = output.contents.root.contents
70 body = root.children[1]
71 foo = body.children[0]
72 self.assertEquals(gumboc.NodeType.ELEMENT, foo.type)
73 self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag)
74 self.assertEquals('<foo bar=quux>', str(foo.original_tag))
75 self.assertEquals('', str(foo.original_end_tag))
76 self.assertEquals('foo', foo.tag_name.decode('utf-8'))
77 self.assertEquals('bar', foo.attributes[0].name)
78 self.assertEquals('quux', foo.attributes[0].value)
79
80 def testUnknownTag(self):
81 with gumboc.parse('<foo bar=quux>1<p>2</foo>') as output:
82 root = output.contents.root.contents
83 body = root.children[1]
84 foo = body.children[0]
85 self.assertEquals(gumboc.NodeType.ELEMENT, foo.type)
86 self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag)
87 self.assertEquals('<foo bar=quux>', str(foo.original_tag))
88 self.assertEquals('', str(foo.original_end_tag))
89 self.assertEquals('foo', foo.tag_name.decode('utf-8'))
90 self.assertEquals('bar', foo.attributes[0].name)
91 self.assertEquals('quux', foo.attributes[0].value)
92
93 def testSarcasm(self):
94 with gumboc.parse('<div><sarcasm><div></div></sarcasm></div>') as output:
95 root = output.contents.root.contents
96 body = root.children[1]
97 div = body.children[0]
98 sarcasm = div.children[0]
99 self.assertEquals(gumboc.NodeType.ELEMENT, sarcasm.type)
100 self.assertEquals(gumboc.Tag.UNKNOWN, sarcasm.tag)
101 self.assertEquals('<sarcasm>', str(sarcasm.original_tag))
102 self.assertEquals('</sarcasm>', str(sarcasm.original_end_tag))
103 self.assertEquals('sarcasm', sarcasm.tag_name.decode('utf-8'))
104
105 def testEnums(self):
106 self.assertEquals(gumboc.Tag.A, gumboc.Tag.A)
107 self.assertEquals(hash(gumboc.Tag.A.value), hash(gumboc.Tag.A))
108
109 def testFragment(self):
110 with gumboc.parse(
111 '<div></div>',
112 fragment_context=gumboc.Tag.TITLE,
113 fragment_namespace=gumboc.Namespace.SVG) as output:
114 root = output.contents.root.contents
115 self.assertEquals(1, len(root.children))
116 div = root.children[0]
117 self.assertEquals(gumboc.NodeType.ELEMENT, div.type)
118 self.assertEquals(gumboc.Tag.DIV, div.tag)
119 self.assertEquals(gumboc.Namespace.HTML, div.tag_namespace)
120
121
122
123
124 if __name__ == '__main__':
125 unittest.main()