comparison mupdf-source/thirdparty/gumbo-parser/examples/positions_of_class.cc @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Author: jdtang@google.com (Jonathan Tang)
16 //
17 // Print out the positions of all elements with a certain CSS class.
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include <algorithm>
23 #include <fstream>
24 #include <iostream>
25 #include <string>
26
27 #include "gumbo.h"
28
29 static std::string find_line(
30 const std::string& original_text, const GumboAttribute& attr) {
31 size_t attr_index = attr.original_value.data - original_text.data();
32 size_t begin = original_text.rfind("\n", attr_index) + 1;
33 size_t end = original_text.find("\n", attr_index);
34 if (end != std::string::npos) {
35 end--;
36 } else {
37 end = (size_t) original_text.length() - 1;
38 }
39 end = std::min(end, attr_index + 40);
40 begin = std::max(begin, attr_index - 40);
41 return original_text.substr(begin, end - begin);
42 }
43
44 static void search_for_class(
45 GumboNode* node, const std::string& original_text, const char* cls_name) {
46 if (node->type != GUMBO_NODE_ELEMENT) {
47 return;
48 }
49 GumboAttribute* cls_attr;
50 if ((cls_attr = gumbo_get_attribute(&node->v.element.attributes, "class")) &&
51 strstr(cls_attr->value, cls_name) != NULL) {
52 std::cout << cls_attr->value_start.line << ":"
53 << cls_attr->value_start.column << " - "
54 << find_line(original_text, *cls_attr) << std::endl;
55 }
56
57 GumboVector* children = &node->v.element.children;
58 for (int i = 0; i < children->length; ++i) {
59 search_for_class(
60 static_cast<GumboNode*>(children->data[i]), original_text, cls_name);
61 }
62 }
63
64 int main(int argc, char** argv) {
65 if (argc != 3) {
66 std::cout << "Usage: positions_of_class <html filename> <CSS classname>.\n";
67 exit(EXIT_FAILURE);
68 }
69 const char* filename = argv[1];
70 const char* cls = argv[2];
71
72 std::ifstream in(filename, std::ios::in | std::ios::binary);
73 if (!in) {
74 std::cout << "File " << filename << " not found!\n";
75 exit(EXIT_FAILURE);
76 }
77
78 std::string contents;
79 in.seekg(0, std::ios::end);
80 contents.resize(in.tellg());
81 in.seekg(0, std::ios::beg);
82 in.read(&contents[0], contents.size());
83 in.close();
84
85 // If you used contents.c_str(), it'd be harder to match up original
86 // positions, because c_str() creates a copy of the string and you can't do
87 // pointer arithmetic betweent contents.data() and the original_* pointers.
88 GumboOutput* output = gumbo_parse_with_options(
89 &kGumboDefaultOptions, contents.data(), contents.length());
90 search_for_class(output->root, contents, cls);
91 gumbo_destroy_output(&kGumboDefaultOptions, output);
92 }