Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/gumbo-parser/python/gumbo/gumboc_test.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/gumbo-parser/python/gumbo/gumboc_test.py Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,125 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Tests for Gumbo CTypes bindings.""" + +__author__ = 'jdtang@google.com (Jonathan Tang)' + +import StringIO + +import unittest + +import gumboc + + +class CtypesTest(unittest.TestCase): + def testWordParse(self): + with gumboc.parse('Test') as output: + doctype_node = output.contents.document.contents + self.assertEquals(gumboc.NodeType.DOCUMENT, doctype_node.type) + document = doctype_node.v.document + self.assertEquals('', document.name) + self.assertEquals('', document.public_identifier) + self.assertEquals('', document.system_identifier) + + root = output.contents.root.contents + self.assertEquals(gumboc.NodeType.ELEMENT, root.type) + self.assertEquals(gumboc.Tag.HTML, root.tag) + self.assertEquals(gumboc.Namespace.HTML, root.tag_namespace) + self.assertEquals(2, len(root.children)) + + head = root.children[0] + self.assertEquals(gumboc.NodeType.ELEMENT, head.type) + self.assertEquals(gumboc.Tag.HEAD, head.tag) + self.assertEquals('head', head.tag_name) + self.assertEquals(gumboc.Namespace.HTML, head.tag_namespace) + self.assertEquals(0, len(head.original_tag)) + self.assertEquals('', str(head.original_end_tag)) + self.assertEquals(0, head.children.length) + + body = root.children[1] + self.assertNotEquals(body, doctype_node) + self.assertEquals(gumboc.NodeType.ELEMENT, body.type) + self.assertEquals(gumboc.Tag.BODY, body.tag) + self.assertEquals('body', body.tag_name) + self.assertEquals(1, len(body.children)) + + text_node = body.children[0] + self.assertEquals(gumboc.NodeType.TEXT, text_node.type) + self.assertEquals('Test', text_node.text) + + def testBufferThatGoesAway(self): + for i in range(10): + source = StringIO.StringIO('<foo bar=quux>1<p>2</foo>') + parse_tree = gumboc.parse(source.read()) + source.close() + with parse_tree as output: + root = output.contents.root.contents + body = root.children[1] + foo = body.children[0] + self.assertEquals(gumboc.NodeType.ELEMENT, foo.type) + self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag) + self.assertEquals('<foo bar=quux>', str(foo.original_tag)) + self.assertEquals('', str(foo.original_end_tag)) + self.assertEquals('foo', foo.tag_name.decode('utf-8')) + self.assertEquals('bar', foo.attributes[0].name) + self.assertEquals('quux', foo.attributes[0].value) + + def testUnknownTag(self): + with gumboc.parse('<foo bar=quux>1<p>2</foo>') as output: + root = output.contents.root.contents + body = root.children[1] + foo = body.children[0] + self.assertEquals(gumboc.NodeType.ELEMENT, foo.type) + self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag) + self.assertEquals('<foo bar=quux>', str(foo.original_tag)) + self.assertEquals('', str(foo.original_end_tag)) + self.assertEquals('foo', foo.tag_name.decode('utf-8')) + self.assertEquals('bar', foo.attributes[0].name) + self.assertEquals('quux', foo.attributes[0].value) + + def testSarcasm(self): + with gumboc.parse('<div><sarcasm><div></div></sarcasm></div>') as output: + root = output.contents.root.contents + body = root.children[1] + div = body.children[0] + sarcasm = div.children[0] + self.assertEquals(gumboc.NodeType.ELEMENT, sarcasm.type) + self.assertEquals(gumboc.Tag.UNKNOWN, sarcasm.tag) + self.assertEquals('<sarcasm>', str(sarcasm.original_tag)) + self.assertEquals('</sarcasm>', str(sarcasm.original_end_tag)) + self.assertEquals('sarcasm', sarcasm.tag_name.decode('utf-8')) + + def testEnums(self): + self.assertEquals(gumboc.Tag.A, gumboc.Tag.A) + self.assertEquals(hash(gumboc.Tag.A.value), hash(gumboc.Tag.A)) + + def testFragment(self): + with gumboc.parse( + '<div></div>', + fragment_context=gumboc.Tag.TITLE, + fragment_namespace=gumboc.Namespace.SVG) as output: + root = output.contents.root.contents + self.assertEquals(1, len(root.children)) + div = root.children[0] + self.assertEquals(gumboc.NodeType.ELEMENT, div.type) + self.assertEquals(gumboc.Tag.DIV, div.tag) + self.assertEquals(gumboc.Namespace.HTML, div.tag_namespace) + + + + +if __name__ == '__main__': + unittest.main()
